In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
df=pd.read_csv("Superstore_clean.csv")

In [3]:
df.head()

Unnamed: 0,order_id,order_date,order_year,order_month,segment,country,city,state,region,product_id,category,subcategory,product_name,sales,quantity,discount,profit
0,CA-2016-152156,2016-08-11,2016,August,Consumer,United States,Henderson,Kentucky,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.91
1,CA-2016-152156,2016-08-11,2016,August,Consumer,United States,Henderson,Kentucky,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.58
2,CA-2016-138688,2016-12-06,2016,December,Corporate,United States,Los Angeles,California,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,2488915.0
3,US-2015-108966,2015-11-10,2015,November,Consumer,United States,Fort Lauderdale,Florida,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.58,5,0.45,-383.03
4,US-2015-108966,2015-11-10,2015,November,Consumer,United States,Fort Lauderdale,Florida,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.37,2,0.2,1192184.0


In [4]:
total_sales = float(df['sales'].sum())

In [5]:
total_profit = float(df['profit'].sum())

In [6]:
df['profit_margin'] = df['profit'] / df['sales']
avg_profit_margin = float(df['profit_margin'].mean())

In [7]:
num_loss_orders = df[df['profit'] < 0].shape[0]
total_orders = df.shape[0]
percent_loss_orders = float((num_loss_orders / total_orders) * 100)

In [8]:
kpi = {
    "total_sales": total_sales,
    "total_profit": total_profit,
    "avg_profit_margin": avg_profit_margin,
    "loss_orders_pct": percent_loss_orders
}

In [9]:
monthly_summary = df.groupby(['order_year', 'order_month']).agg(
    total_sales=('sales', 'sum'),
    total_profit=('profit', 'sum')
).reset_index()

In [10]:
monthly_summary_json = (
    monthly_summary
    .assign(
        month=lambda x: x['order_month'].astype(str).str.zfill(2),
        year=lambda x: x['order_year'].astype(str)
    )
    .assign(
        period=lambda x: x['year'] + "-" + x['month']
    )
    [["period", "total_sales", "total_profit"]]
    .to_dict(orient="records")
)

In [11]:
category_summary = (
    df.groupby(['category', 'subcategory'])
      .agg(
          total_sales=('sales', 'sum'),
          total_profit=('profit', 'sum'),
          avg_profit_margin=('profit_margin', 'mean'),
          orders_count=('order_id', 'nunique')
      )
      .reset_index()
)

In [12]:
category_summary_json = (
    category_summary
    .round({
        "total_sales": 2,
        "total_profit": 2,
        "avg_profit_margin": 4
    })
    .to_dict(orient="records")
)

In [13]:
region_summary = (
    df.groupby('region')
      .agg(
          total_sales=('sales', 'sum'),
          total_profit=('profit', 'sum'),
          avg_profit_margin=('profit_margin', 'mean'),
          orders_count=('order_id', 'nunique')
      )
      .reset_index()
)

In [14]:
region_summary_json = (
    region_summary
    .round({
        "total_sales": 2,
        "total_profit": 2,
        "avg_profit_margin": 4
    })
    .to_dict(orient="records")
)

In [15]:
segment_summary = (
    df.groupby('segment')
      .agg(
          total_sales=('sales', 'sum'),
          total_profit=('profit', 'sum'),
          avg_profit_margin=('profit_margin', 'mean'),
          orders_count=('order_id', 'nunique')
      )
      .reset_index()
)

In [16]:
segment_summary_json = (
    segment_summary
    .round({
        "total_sales": 2,
        "total_profit": 2,
        "avg_profit_margin": 4
    })
    .to_dict(orient="records")
)

In [17]:
category_summary.sort_values('total_profit', ascending=False)

Unnamed: 0,category,subcategory,total_sales,total_profit,avg_profit_margin,orders_count
10,Office Supplies,Paper,7945558.41,584605900.0,25583.606732,1191
5,Office Supplies,Art,9175250.65,429709200.0,14474.037454,731
2,Furniture,Furnishings,3959851.12,381191000.0,14143.596941,877
6,Office Supplies,Binders,6622988.6,363953700.0,14101.880896,1316
11,Office Supplies,Storage,966421.47,295668700.0,7443.441764,777
9,Office Supplies,Labels,5029283.33,268925700.0,44719.898941,346
16,Technology,Phones,983176.09,154788500.0,4060.248809,814
13,Technology,Accessories,1497195.0,139456300.0,5433.500324,713
8,Office Supplies,Fasteners,3413481.87,120519200.0,25112.501335,215
7,Office Supplies,Envelopes,1313071.59,107464400.0,20225.336014,249


In [18]:
region_summary.sort_values('total_profit', ascending=False)

Unnamed: 0,region,total_sales,total_profit,avg_profit_margin,orders_count
3,West,17337512.12,1054135000.0,11495.924004,1611
1,East,12610566.06,916990900.0,13806.219953,1401
0,Central,8519887.69,618013200.0,13166.344424,1174
2,South,6073679.99,498989900.0,12963.926711,822


In [19]:
segment_summary.sort_values('total_profit', ascending=False)

Unnamed: 0,segment,total_sales,total_profit,avg_profit_margin,orders_count
0,Consumer,22975071.16,1625222000.0,12640.956573,2585
1,Corporate,13422765.25,938196000.0,13227.708219,1514
2,Home Office,8143809.45,524711700.0,12427.369053,909


In [20]:
report_input = {
    "kpi": kpi,
    "monthly_summary": monthly_summary_json,
    "category_summary": category_summary_json,
    "region_summary": region_summary_json,
    "segment_summary": segment_summary_json
}

In [21]:
report_input_json = json.dumps(report_input, indent=2)