In [3]:
import pandas as pd

In [4]:
data = {
    "order_id": [101, 102, 103, 104, 105, 106],
    "customer": ["A", "B", "A", "C", "B", "A"],
    "city": ["Delhi", "Mumbai", "Delhi", "Bangalore", "Mumbai", "Delhi"],
    "amount": [500, 700, 300, 900, 400, 200],
    "order_date": pd.to_datetime([
        "2024-01-01", "2024-01-02", "2024-01-03",
        "2024-01-05", "2024-01-06", "2024-01-07"
    ])
}
df = pd.DataFrame(data)

Calculate running total per customer

In [5]:
df['running_total'] = (
    df.sort_values('order_date')
    .groupby('customer')['amount']
    .cumsum()
)
df

Unnamed: 0,order_id,customer,city,amount,order_date,running_total
0,101,A,Delhi,500,2024-01-01,500
1,102,B,Mumbai,700,2024-01-02,700
2,103,A,Delhi,300,2024-01-03,800
3,104,C,Bangalore,900,2024-01-05,900
4,105,B,Mumbai,400,2024-01-06,1100
5,106,A,Delhi,200,2024-01-07,1000


Rank order by amount per city

In [6]:
df['rank'] = (
    df.groupby('city')['amount']
    .rank(method='dense', ascending=False)
)
df

Unnamed: 0,order_id,customer,city,amount,order_date,running_total,rank
0,101,A,Delhi,500,2024-01-01,500,1.0
1,102,B,Mumbai,700,2024-01-02,700,1.0
2,103,A,Delhi,300,2024-01-03,800,2.0
3,104,C,Bangalore,900,2024-01-05,900,1.0
4,105,B,Mumbai,400,2024-01-06,1100,2.0
5,106,A,Delhi,200,2024-01-07,1000,3.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   order_id       6 non-null      int64         
 1   customer       6 non-null      object        
 2   city           6 non-null      object        
 3   amount         6 non-null      int64         
 4   order_date     6 non-null      datetime64[ns]
 5   running_total  6 non-null      int64         
 6   rank           6 non-null      float64       
dtypes: datetime64[ns](1), float64(1), int64(3), object(2)
memory usage: 468.0+ bytes


In [12]:
duplicates = df[df.duplicated(subset=['customer', 'order_date'], keep=False)]
duplicates

Unnamed: 0,order_id,customer,city,amount,order_date,running_total,rank


Pivot Table - Total amount per city per customer

In [14]:
pd.pivot_table(
    df,
    values="amount",
    index="city",
    columns="customer",
    aggfunc='sum',
    fill_value=0
)

customer,A,B,C
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bangalore,0,0,900
Delhi,1000,0,0
Mumbai,0,1100,0


Explode nested list columns

In [17]:
df2 = pd.DataFrame({
    "order_id": [1, 2],
    "items": [["A", "B"], ["C", "D", "E"]]
})
df2.explode('items')

Unnamed: 0,order_id,items
0,1,A
0,1,B
1,2,C
1,2,D
1,2,E


In [19]:
df.to_json('orders.json', indent=4)