In [3]:
import pandas as pd
import numpy as np

sales = pd.read_csv("sales.csv", parse_dates=["sale_date"])
inventory = pd.read_csv("inventory.csv", parse_dates=["updated_at"])

sales['unit_price'] = sales['unit_price'].fillna(sales.groupby('product_id')['unit_price'].transform('median'))
sales['unit_price'] = sales['unit_price'].fillna(sales['unit_price'].median())
sales['quantity'] = sales['quantity'].fillna(0).astype(int)
sales = sales.drop_duplicates()
sales['sale_month'] = sales['sale_date'].dt.to_period('M').dt.to_timestamp()
sales['sales_value'] = sales['quantity'] * sales['unit_price']

monthly_sales = sales.groupby(['sale_month','product_id']).agg(
    units_sold=('quantity','sum'),
    sales_value=('sales_value','sum')
).reset_index()

top_units = monthly_sales.groupby('product_id').units_sold.sum().nlargest(10)
top_value = monthly_sales.groupby('product_id').sales_value.sum().nlargest(10)

inv_latest = inventory.groupby('product_id').quantity.agg(['mean','median','min','max']).reset_index()
inv_latest = inv_latest.rename(columns={'mean':'avg_inventory_qty'})

total_units = sales.groupby('product_id').quantity.sum().reset_index().rename(columns={'quantity':'total_units_sold'})
turnover = total_units.merge(inv_latest[['product_id','avg_inventory_qty']], on='product_id', how='left')
turnover['avg_inventory_qty'] = turnover['avg_inventory_qty'].replace(0, np.nan)
turnover['inventory_turnover'] = turnover['total_units_sold'] / turnover['avg_inventory_qty']
turnover['inventory_turnover'] = turnover['inventory_turnover'].replace([np.inf, -np.inf], np.nan)

product_summary = monthly_sales.groupby('product_id').agg(
    total_units=('units_sold','sum'),
    total_value=('sales_value','sum')
).reset_index()
product_summary = product_summary.merge(inv_latest[['product_id','avg_inventory_qty']], on='product_id', how='left')

underperforming = product_summary[(product_summary.total_units < 10) & (product_summary.avg_inventory_qty > 20)]

monthly_sales.to_csv("processed_monthly_sales.csv", index=False)
turnover.to_csv("inventory_turnover.csv", index=False)
product_summary.to_csv("product_summary.csv", index=False)
underperforming.to_csv("underperforming_products.csv", index=False)

print("Top products by units sold:\n", top_units.head(10))
print("\nTop products by sales value:\n", top_value.head(10))
print("\nUnderperforming products (sample):\n", underperforming.head())


Top products by units sold:
 product_id
P004    80
P001    63
P002    58
P005    43
P003    35
Name: units_sold, dtype: int64

Top products by sales value:
 product_id
P004    18349.20
P001    13399.37
P002    12099.42
P005     7199.57
P003     6349.65
Name: sales_value, dtype: float64

Underperforming products (sample):
 Empty DataFrame
Columns: [product_id, total_units, total_value, avg_inventory_qty]
Index: []
