##### SQL-like Operations and Data I/O

In [1]:
import pandas as pd
import numpy as np

##### Advanced GROUP BY Operations
Multiple aggregations and complex grouping

In [2]:
# Load tips dataset
tips = pd.read_csv('data/tips.csv')

# Multiple aggregations
print("Tips summary by day:")
print(tips.groupby('day').agg({
    'tip': [np.mean, np.size],
    'total_bill': [np.sum, np.mean]
}))

Tips summary by day:
           tip      total_bill           
          mean size        sum       mean
day                                      
Fri   2.734737   19     325.88  17.151579
Sat   2.993103   87    1778.40  20.441379
Sun   3.255132   76    1627.16  21.410000
Thur  2.771452   62    1096.33  17.682742


  print(tips.groupby('day').agg({
  print(tips.groupby('day').agg({
  print(tips.groupby('day').agg({


In [3]:
# Group by multiple columns
print("Tips summary by smoker and day:")
print(tips.groupby(['smoker', 'day']).agg({
    'tip': [np.size, np.mean]
}))

Tips summary by smoker and day:
             tip          
            size      mean
smoker day                
No     Fri     4  2.812500
       Sat    45  3.102889
       Sun    57  3.167895
       Thur   45  2.673778
Yes    Fri    15  2.714000
       Sat    42  2.875476
       Sun    19  3.516842
       Thur   17  3.030000


  print(tips.groupby(['smoker', 'day']).agg({


##### JOIN Operations
Different types of joins in pandas

In [4]:
# Create sample DataFrames
df1 = pd.DataFrame({
    'key': ['A', 'B', 'C', 'D'],
    'value': np.random.randn(4)
})

df2 = pd.DataFrame({
    'key': ['B', 'D', 'D', 'E'],
    'value': np.random.randn(4)
})

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)

DataFrame 1:
  key     value
0   A  0.886876
1   B  0.585386
2   C  1.022575
3   D  0.006838

DataFrame 2:
  key     value
0   B  0.993532
1   D  1.063438
2   D -0.860047
3   E -0.204878


In [5]:
# Inner join
print("Inner join:")
print(pd.merge(df1, df2, on='key'))

# Left join
print("\nLeft join:")
print(pd.merge(df1, df2, on='key', how='left'))

# Right join
print("\nRight join:")
print(pd.merge(df1, df2, on='key', how='right'))

Inner join:
  key   value_x   value_y
0   B  0.585386  0.993532
1   D  0.006838  1.063438
2   D  0.006838 -0.860047

Left join:
  key   value_x   value_y
0   A  0.886876       NaN
1   B  0.585386  0.993532
2   C  1.022575       NaN
3   D  0.006838  1.063438
4   D  0.006838 -0.860047

Right join:
  key   value_x   value_y
0   B  0.585386  0.993532
1   D  0.006838  1.063438
2   D  0.006838 -0.860047
3   E       NaN -0.204878


In [6]:
# Join with index
indexed_df2 = df2.set_index('key')
print("Join DataFrame with indexed DataFrame:")
print(pd.merge(df1, indexed_df2, left_on='key', right_index=True))

Join DataFrame with indexed DataFrame:
  key   value_x   value_y
1   B  0.585386  0.993532
3   D  0.006838  1.063438
3   D  0.006838 -0.860047


##### Data Input/Output
Reading and writing data in different formats

In [7]:
# Create DataFrame from values
df = pd.DataFrame({
    'x': [1, 3, 5],
    'y': [2, 4, 6]
})

print("DataFrame from dictionary:")
print(df)

DataFrame from dictionary:
   x  y
0  1  2
1  3  4
2  5  6


In [8]:
# Write DataFrame to CSV
df.to_csv('sample.csv', index=False)

# Read CSV file
df_read = pd.read_csv('sample.csv')
print("\nDataFrame read from CSV:")
print(df_read)


DataFrame read from CSV:
   x  y
0  1  2
1  3  4
2  5  6


##### Column Operations
Manipulating columns in pandas

In [9]:
# Load tips data
print("Original tips data:")
print(tips.head())

# Modify existing column
tips['total_bill'] = tips['total_bill'] - 2

# Create new column
tips['new_bill'] = tips['total_bill'] / 2

print("\nModified tips data:")
print(tips.head())

# Drop column
tips = tips.drop('new_bill', axis=1)

print("\nAfter dropping new_bill:")
print(tips.head())

Original tips data:
   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4

Modified tips data:
   total_bill   tip     sex smoker  day    time  size  new_bill
0       14.99  1.01  Female     No  Sun  Dinner     2     7.495
1        8.34  1.66    Male     No  Sun  Dinner     3     4.170
2       19.01  3.50    Male     No  Sun  Dinner     3     9.505
3       21.68  3.31    Male     No  Sun  Dinner     2    10.840
4       22.59  3.61  Female     No  Sun  Dinner     4    11.295

After dropping new_bill:
   total_bill   tip     sex smoker  day    time  size
0       14.99  1.01  Female     No  Sun  Dinner     2
1        8.34  1.66    Male     No  Sun  Dinner     3
2       19.01  3.50    Male     No  Sun  Dinner     3
3       2