### DATSET

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = {
    "order_id": [201, 202, 203, 204, 205, 206, 207, 208, 209, 210],
    "customer": ["A", "B", "A", "C", "B", "A", "D", "C", "B", "D"],
    "region": ["East", "West", "East", "North", "West", "East", "South", "North", "West", "South"],
    "category": ["Electronics", "Electronics", "Furniture", "Furniture", "Electronics",
                 "Electronics", "Furniture", "Electronics", "Furniture", "Electronics"],
    "amount": [1200, 1500, 800, 700, None, 2200, 900, 1600, 1100, 1800],
    "quantity": [1, 2, 1, 1, 2, 3, 1, 2, 2, 1],
    "is_returned": [False, False, True, False, True, False, False, True, False, False]
}

df = pd.DataFrame(data)


#### 01. Total sales amount per region (ignore missing amounts).

In [2]:
# Simplest Approach
# df.groupby('region')['amount'].sum()

# Optimized Approach
df.groupby('region', as_index=False).agg(region_total=('amount','sum'))

Unnamed: 0,region,region_total
0,East,4200.0
1,North,2300.0
2,South,2700.0
3,West,2600.0


#### 02. Average order value per customer, sorted highest to lowest.

In [4]:
df.groupby('customer').agg(avg_order_val = ('amount','mean')).sort_values(by='avg_order_val', ascending=False)

Unnamed: 0_level_0,avg_order_val
customer,Unnamed: 1_level_1
A,1400.0
D,1350.0
B,1300.0
C,1150.0


#### 03. Count of orders per category.

In [9]:
df.groupby('category', as_index=False).agg(total_orders = ('order_id', 'count'))

Unnamed: 0,category,total_orders
0,Electronics,6
1,Furniture,4


#### 04. Total quantity sold per region and category.

In [14]:
df.groupby(['region', 'category']).agg(total_qty_sold = ('quantity', 'sum'))

Unnamed: 0_level_0,Unnamed: 1_level_0,total_qty_sold
region,category,Unnamed: 2_level_1
East,Electronics,4
East,Furniture,1
North,Electronics,2
North,Furniture,1
South,Electronics,1
South,Furniture,1
West,Electronics,4
West,Furniture,2


#### 05. Number of returned orders per customer.

In [18]:
df.groupby('customer').agg(returns = ('is_returned', 'sum'))

Unnamed: 0_level_0,returns
customer,Unnamed: 1_level_1
A,1
B,1
C,1
D,0


#### 06. Top 2 regions by total sales.

In [50]:
df.groupby('region', as_index=False).agg(
    total_sales = ('amount', 'sum')
    ).sort_values('total_sales', ascending=False).head(2)

Unnamed: 0,region,total_sales
0,East,4200.0
2,South,2700.0


- only sales from 2nd highest region

In [53]:
sales = df.groupby('region').agg(total_sales = ('amount', 'sum'))
sales[sales['total_sales'].rank(method='dense', ascending=False) == 2]

Unnamed: 0_level_0,total_sales
region,Unnamed: 1_level_1
South,2700.0


- Another approach (but dont handle ties)

In [52]:
df.groupby('region').agg(
    total_sales = ('amount', 'sum')
    ).sort_values('total_sales', ascending=False).iloc[[1]]

Unnamed: 0_level_0,total_sales
region,Unnamed: 1_level_1
South,2700.0


#### 07. For each category, show min, max, avg amount.

In [63]:
df.groupby('category').agg(
    min_amount = ('amount', 'min'),
    max_amount = ('amount', 'max'),
    avg_amount = ('amount', 'mean')
).reset_index()

Unnamed: 0,category,min_amount,max_amount,avg_amount
0,Electronics,1200.0,2200.0,1660.0
1,Furniture,700.0,1100.0,875.0


#### 08. Customers who placed more than 2 orders.

In [60]:
df.groupby('customer').agg(
    order_count = ('order_id', 'count')
).query('order_count > 2')

Unnamed: 0_level_0,order_count
customer,Unnamed: 1_level_1
A,3
B,3


#### 09. Region-wise percentage of returned orders.

In [67]:
df.groupby("region").agg(
    return_rate=("is_returned", "mean")
).mul(100).round(2).sort_values(by='return_rate', ascending=False).reset_index()

Unnamed: 0,region,return_rate
0,North,50.0
1,East,33.33
2,West,33.33
3,South,0.0


#### 10. Category-wise total sales, treating missing amounts as zero.

In [68]:
df.assign(amount = df['amount'].fillna(0))\
    .groupby('category')\
    .agg(total_sales = ('amount', 'sum'))

Unnamed: 0_level_0,total_sales
category,Unnamed: 1_level_1
Electronics,8300.0
Furniture,3500.0


#### 11. Customer with the highest total purchase value.

In [70]:
df.groupby('customer')\
    .agg(total_spent = ('amount', 'sum'))\
        .sort_values(by='total_spent', ascending=False)\
        .head(1)

Unnamed: 0_level_0,total_spent
customer,Unnamed: 1_level_1
A,4200.0


- Another Approach

In [74]:
highest_spender = df.groupby('customer').agg(total_spent = ('amount', 'sum'))
highest_spender[highest_spender['total_spent'].rank(method='dense', ascending=False) == 1]

Unnamed: 0_level_0,total_spent
customer,Unnamed: 1_level_1
A,4200.0


#### 12. Region-wise average quantity per order, sorted descending.

In [77]:
df.groupby("region", as_index=False)\
    .agg(avg_qty=("quantity", "mean"))\
        .sort_values("avg_qty", ascending=False)\
        .round(2)

Unnamed: 0,region,avg_qty
3,West,2.0
0,East,1.67
1,North,1.5
2,South,1.0


#### 13. For each category, count distinct customers.

In [91]:
# Approach - 1
approach_1 = df.groupby('category')['customer'].nunique()

# Approach - 2
approach_2 = df.groupby('category').agg({'customer' : 'nunique'})

# Approach - 3
approach_3 = df.groupby('category').agg(unique_customers = ('customer', 'nunique'))

print(approach_1)
print("\n--------------------")
print(approach_2)
print("\n--------------------")
print(approach_3)

category
Electronics    4
Furniture      4
Name: customer, dtype: int64

--------------------
             customer
category             
Electronics         4
Furniture           4

--------------------
             unique_customers
category                     
Electronics                 4
Furniture                   4


#### 14. Show categories where total sales exceed 3000.

In [92]:
df.groupby('category')\
    .agg(total_sales = ('amount', 'sum'))\
    .query('total_sales > 3000')

Unnamed: 0_level_0,total_sales
category,Unnamed: 1_level_1
Electronics,8300.0
Furniture,3500.0


#### 15. Region-wise return count and total orders, sorted by return count.

In [94]:
df.groupby('region')\
    .agg(
        returned_orders = ('is_returned', 'sum'),
        total_orders = ('order_id', 'count')
    ).sort_values(by='returned_orders', ascending=False)

Unnamed: 0_level_0,returned_orders,total_orders
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,1,3
North,1,2
West,1,3
South,0,2
