### Pandas Basics

1. groupby()

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'B', 'C'],
    'Values': [10, 20, 30, 40, 50]
})

In [4]:
# Group by Category and sum Values
df.groupby('Category')['Values'].sum()

Category
A    40
B    60
C    50
Name: Values, dtype: int64

In [8]:
df.groupby('Category')['Values'].agg(['mean', 'max'])

Unnamed: 0_level_0,mean,max
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,20.0,30
B,30.0,40
C,50.0,50


In [9]:
# series into a df
df_grouped = df.groupby('Category')['Values'].sum().reset_index()

In [13]:
df_grouped.sort_values(by='Values', ascending=False)

Unnamed: 0,Category,Values
1,B,60
2,C,50
0,A,40


2. merge()

Joins: inner, outer, left, right

In [14]:
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID': [1, 2, 4],
    'Score': [85, 90, 95]
})


In [15]:
# Inner Join on 'ID'
merged_inner = pd.merge(df1, df2, on='ID', how='inner')

In [16]:
merged_inner

Unnamed: 0,ID,Name,Score
0,1,Alice,85
1,2,Bob,90


In [17]:
# Outer Join on 'ID'
merged_inner = pd.merge(df1, df2, on='ID', how='outer')

In [18]:
merged_inner

Unnamed: 0,ID,Name,Score
0,1,Alice,85.0
1,2,Bob,90.0
2,3,Charlie,
3,4,,95.0


In [19]:
# left Join on 'ID'
merged_inner = pd.merge(df1, df2, on='ID', how='left')

merged_inner

Unnamed: 0,ID,Name,Score
0,1,Alice,85.0
1,2,Bob,90.0
2,3,Charlie,


In [20]:
# right Join on 'ID'
merged_inner = pd.merge(df1, df2, on='ID', how='right')

merged_inner

Unnamed: 0,ID,Name,Score
0,1,Alice,85
1,2,Bob,90
2,4,,95


3. pivot()

In [21]:
data = pd.DataFrame({
    'Date': ['2024-01', '2024-01', '2024-02', '2024-02'],
    'Product': ['A', 'B', 'A', 'B'],
    'Sales': [100, 200, 150, 250]
})


In [22]:
data

Unnamed: 0,Date,Product,Sales
0,2024-01,A,100
1,2024-01,B,200
2,2024-02,A,150
3,2024-02,B,250


In [23]:
# Pivot Date as index, Product as columns
pivoted = data.pivot(index='Date', columns='Product', values='Sales')

In [24]:
pivoted

Product,A,B
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01,100,200
2024-02,150,250
