In [2]:
import pandas as pd 
import numpy as np

df = pd.read_csv(r"C:\Users\shaif\OneDrive\Desktop\gamezone_orders_data.csv", sep=",", encoding="utf-8",keep_default_na=False, na_values=[""], dtype=str)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21864 entries, 0 to 21863
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   USER_ID                          21864 non-null  object
 1   ORDER_ID                         21864 non-null  object
 2   PURCHASE_TS                      21864 non-null  object
 3   PURCHASE_TS_CLEANED              21863 non-null  object
 4   PURCHASE_YEAR                    21863 non-null  object
 5   PURCHASE_MONTH                   21863 non-null  object
 6   TIME_TO_SHIP                     21863 non-null  object
 7   SHIP_TS                          21864 non-null  object
 8   PRODUCT_NAME                     21864 non-null  object
 9   PRODUCT_NAME_CLEANED             21864 non-null  object
 10  PRODUCT_ID                       21864 non-null  object
 11   USD_PRICE                       21859 non-null  object
 12  PURCHASE_PLATFORM               

In [3]:
# data cleaning 

df.columns = [col.lower() for col in df.columns] # lowering all column names 

df.columns = df.columns.str.strip()  # removes spaces around names

df['purchase_ts_cleaned'] = pd.to_datetime(df['purchase_ts_cleaned'], errors='coerce', dayfirst=True) # converting to datetime

df['ship_ts'] = pd.to_datetime(df['ship_ts'], errors='coerce', dayfirst=True) #  Convert to datetime


df['revenue'] = df['revenue'].replace(r'[\$,]', '', regex=True)
df['revenue'] = pd.to_numeric(df['revenue'], errors='coerce')

df['region'] = df['region'].astype(str).str.strip().replace('', pd.NA)
df['region'].isna().sum()

df['purchase_year'] = df['purchase_year'].fillna(0).astype(int)
df['purchase_month'] = df['purchase_month'].fillna(0).astype(int)
df['time_to_ship'] = df['time_to_ship'].fillna(0).astype(int)

df['country_code'] = df['country_code'].astype(str).str.strip().str.upper() #cleaning the countries 

df['product_name_cleaned'] = df['product_name_cleaned'].str.strip().str.lower() # cleaning product name 

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21864 entries, 0 to 21863
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   user_id                          21864 non-null  object        
 1   order_id                         21864 non-null  object        
 2   purchase_ts                      21864 non-null  object        
 3   purchase_ts_cleaned              21863 non-null  datetime64[ns]
 4   purchase_year                    21864 non-null  int32         
 5   purchase_month                   21864 non-null  int32         
 6   time_to_ship                     21864 non-null  int32         
 7   ship_ts                          21864 non-null  datetime64[ns]
 8   product_name                     21864 non-null  object        
 9   product_name_cleaned             21864 non-null  object        
 10  product_id                       21864 non-null  object   

In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# 1Ô∏è‚É£ Ensure purchase month exists
df['order_month'] = df['purchase_ts_cleaned'].dt.to_period('M').astype(str)

# 2Ô∏è‚É£ Identify top 5 products by revenue
top_products = (
    df.groupby('product_name_cleaned')['revenue']
    .sum()
    .nlargest(5)
    .index
)

# 3Ô∏è‚É£ Filter data for only top 5 products
df_top = df[df['product_name_cleaned'].isin(top_products)]

# 4Ô∏è‚É£ Create pivot table: Rows = month, Columns = product, Values = revenue
pivot_top_products = pd.pivot_table(
    df_top,
    index='order_month',
    columns='product_name_cleaned',
    values='revenue',
    aggfunc='sum',
    fill_value=0
).reset_index()

# 5Ô∏è‚É£ Save to CSV for Excel use (you can later add sparklines manually)
pivot_top_products.to_csv("top_products_monthly_revenue.csv", index=False)

print("‚úÖ Pivot table saved as 'top_products_monthly_revenue.csv'")
print(pivot_top_products.head())


‚úÖ Pivot table saved as 'top_products_monthly_revenue.csv'
product_name_cleaned order_month  27in 4k gaming monitor  \
0                        2019-01                33501.10   
1                        2019-02                26409.22   
2                        2019-03                35169.23   
3                        2019-04                48754.08   
4                        2019-05                38973.22   

product_name_cleaned  jbl quantum 100 gaming headset  lenovo ideapad gaming 3  \
0                                            2894.73                  6524.52   
1                                            1810.60                  7548.12   
2                                            3456.28                 14864.02   
3                                            3369.99                 10771.68   
4                                            3120.32                 12594.76   

product_name_cleaned  nintendo switch  sony playstation 5 bundle  
0                        

In [5]:


# 1Ô∏è‚É£ Extract order month from purchase timestamp
df['order_month'] = df['purchase_ts_cleaned'].dt.to_period('M').astype(str)

# 2Ô∏è‚É£ Find top 5 products by total revenue
top_products = (
    df.groupby('product_name_cleaned')['revenue']
    .sum()
    .nlargest(5)
    .index
)

# 3Ô∏è‚É£ Filter data for only those top 5 products
df_top = df[df['product_name_cleaned'].isin(top_products)]

# 4Ô∏è‚É£ Group by product and order_month, summing revenue
monthly_revenue = (
    df_top.groupby(['product_name_cleaned', 'order_month'])['revenue']
    .sum()
    .reset_index()
)

# 5Ô∏è‚É£ Compute each product‚Äôs average monthly revenue
avg_revenue = (
    monthly_revenue.groupby('product_name_cleaned')['revenue']
    .mean()
    .rename('avg_revenue')
    .reset_index()
)

# 6Ô∏è‚É£ Compute each product‚Äôs December average revenue
dec_revenue = (
    monthly_revenue[monthly_revenue['order_month'].str.endswith('-12')]
    .groupby('product_name_cleaned')['revenue']
    .mean()
    .rename('dec_revenue')
    .reset_index()
)

# 7Ô∏è‚É£ Merge average and December revenue
seasonality_df = avg_revenue.merge(dec_revenue, on='product_name_cleaned', how='left')

# 8Ô∏è‚É£ Calculate % difference (December vs average)
seasonality_df['december_vs_avg_%'] = (
    ((seasonality_df['dec_revenue'] - seasonality_df['avg_revenue']) / seasonality_df['avg_revenue']) * 100
).round(2)

# 9Ô∏è‚É£ Compute overall December spike across top 5
overall_spike = seasonality_df['december_vs_avg_%'].mean().round(2)

# 10Ô∏è‚É£ Build the log insight
log_insight_4 = {
    "Metric": "Seasonality",
    "Dimension": "Time + Product",
    "Observation": f"All top 5 products show a revenue spike in December (+{overall_spike}% vs avg), suggesting holiday shopping.",
    "Stakeholders": "CMO (for campaign planning), COO (for capacity)",
    "Priority": "MEDIUM"
}

# 11Ô∏è‚É£ Display results
print("üìä LOG INSIGHT #4:")
for key, value in log_insight_4.items():
    print(f"{key}: {value}")

seasonality_df


üìä LOG INSIGHT #4:
Metric: Seasonality
Dimension: Time + Product
Observation: All top 5 products show a revenue spike in December (+54.01% vs avg), suggesting holiday shopping.
Stakeholders: CMO (for campaign planning), COO (for capacity)
Priority: MEDIUM


Unnamed: 0,product_name_cleaned,avg_revenue,dec_revenue,december_vs_avg_%
0,27in 4k gaming monitor,75714.051538,114003.505,50.57
1,jbl quantum 100 gaming headset,3562.876667,5099.18,43.12
2,lenovo ideapad gaming 3,28288.713846,49831.155,76.15
3,nintendo switch,63796.832692,92555.22,45.08
4,sony playstation 5 bundle,61124.763462,94817.945,55.12


In [6]:
# Optional: save detailed product-level analysis
seasonality_df.to_csv("top_products_seasonality.csv", index=False)
print("\n‚úÖ Saved product-level seasonality data to 'top_products_seasonality.csv'")


‚úÖ Saved product-level seasonality data to 'top_products_seasonality.csv'


In [7]:
# 1Ô∏è‚É£ Extract month name or month number
df['order_month'] = df['purchase_ts_cleaned'].dt.month_name()

# 2Ô∏è‚É£ Aggregate total revenue by product and month
monthly_revenue = (
    df.groupby(['product_name_cleaned', 'order_month'])['revenue']
    .sum()
    .reset_index()
)

# 3Ô∏è‚É£ Compute average monthly revenue per product
avg_revenue = (
    monthly_revenue.groupby('product_name_cleaned')['revenue']
    .mean()
    .rename('avg_revenue')
    .reset_index()
)

# 4Ô∏è‚É£ Merge average revenue with monthly data
monthly_revenue = monthly_revenue.merge(avg_revenue, on='product_name_cleaned', how='left')

# 5Ô∏è‚É£ Calculate % difference from product‚Äôs average
monthly_revenue['vs_avg_%'] = (
    ((monthly_revenue['revenue'] - monthly_revenue['avg_revenue']) / monthly_revenue['avg_revenue']) * 100
).round(2)

# 6Ô∏è‚É£ For each product, find the month with the highest % spike
top_months = (
    monthly_revenue.loc[monthly_revenue.groupby('product_name_cleaned')['vs_avg_%'].idxmax()]
    .reset_index(drop=True)
    .sort_values('vs_avg_%', ascending=False)
)

# 7Ô∏è‚É£ Find the overall most common ‚Äúspike month‚Äù
common_spike_month = (
    top_months['order_month']
    .value_counts()
    .idxmax()
)
avg_spike_percent = round(top_months['vs_avg_%'].mean(), 2)

# 8Ô∏è‚É£ Build the log insight
log_insight_seasonality = {
    "Metric": "Seasonality",
    "Dimension": "Time + Product",
    "Observation": f"Most top products peak in {common_spike_month} (+{avg_spike_percent}% vs avg). Indicates seasonal demand surge.",
    "Stakeholders": "CMO (campaign planning), COO (capacity planning)",
    "Priority": "MEDIUM"
}

# 9Ô∏è‚É£ Display results
print("üìä LOG INSIGHT #4: SEASONALITY\n")
for k, v in log_insight_seasonality.items():
    print(f"{k}: {v}")


top_months


üìä LOG INSIGHT #4: SEASONALITY

Metric: Seasonality
Dimension: Time + Product
Observation: Most top products peak in December (+54.84% vs avg). Indicates seasonal demand surge.
Stakeholders: CMO (campaign planning), COO (capacity planning)
Priority: MEDIUM


Unnamed: 0,product_name_cleaned,order_month,revenue,avg_revenue,vs_avg_%
1,acer nitro v gaming laptop,December,11432.38,5471.765,108.93
2,dell gaming mouse,December,6158.06,3040.834167,102.51
4,lenovo ideapad gaming 3,December,99662.31,61292.213333,62.6
7,sony playstation 5 bundle,December,189635.89,132436.9875,43.19
0,27in 4k gaming monitor,December,228007.01,164047.111667,38.99
5,nintendo switch,December,185110.44,138226.470833,33.92
3,jbl quantum 100 gaming headset,December,10198.36,8014.6425,27.25
6,razer pro gaming headset,November,153.22,126.318571,21.3


In [8]:
# Optional: save detailed file
top_months.to_csv("seasonality_by_product.csv", index=False)
print("\n‚úÖ Saved product-level seasonal peak data to 'seasonality_by_product.csv'")


‚úÖ Saved product-level seasonal peak data to 'seasonality_by_product.csv'


In [10]:
# ============================================
# üìÖ SEASONALITY ANALYSIS
# ============================================

# 1Ô∏è‚É£ Extract month name and month number (for proper sorting)
df['order_month'] = df['purchase_ts_cleaned'].dt.month_name()
df['order_month_num'] = df['purchase_ts_cleaned'].dt.month

# 2Ô∏è‚É£ Aggregate total revenue by product and month
monthly_revenue = (
    df.groupby(['product_name_cleaned', 'order_month', 'order_month_num'])['revenue']
    .sum()
    .reset_index()
)

# 3Ô∏è‚É£ Compute average monthly revenue per product
avg_revenue = (
    monthly_revenue.groupby('product_name_cleaned')['revenue']
    .mean()
    .rename('avg_revenue')
    .reset_index()
)

# 4Ô∏è‚É£ Merge average revenue with monthly data
monthly_revenue = monthly_revenue.merge(avg_revenue, on='product_name_cleaned', how='left')

# 5Ô∏è‚É£ Calculate % difference from product's average
monthly_revenue['vs_avg_%'] = (
    ((monthly_revenue['revenue'] - monthly_revenue['avg_revenue']) / monthly_revenue['avg_revenue']) * 100
).round(2)

# 6Ô∏è‚É£ For each product, find the month with the highest % spike
top_months = (
    monthly_revenue.loc[monthly_revenue.groupby('product_name_cleaned')['vs_avg_%'].idxmax()]
    .reset_index(drop=True)
    .sort_values('vs_avg_%', ascending=False)
)

# 7Ô∏è‚É£ Find the overall most common "spike month"
common_spike_month = (
    top_months['order_month']
    .value_counts()
    .idxmax()
)
avg_spike_percent = round(top_months['vs_avg_%'].mean(), 2)

# 8Ô∏è‚É£ Identify low-performing months (bottom performers)
bottom_months = (
    monthly_revenue.loc[monthly_revenue.groupby('product_name_cleaned')['vs_avg_%'].idxmin()]
    .reset_index(drop=True)
    .sort_values('vs_avg_%', ascending=True)
)

common_low_month = (
    bottom_months['order_month']
    .value_counts()
    .idxmax()
)
avg_low_percent = round(bottom_months['vs_avg_%'].mean(), 2)

# üìã INSIGHTS & RECOMMENDATIONS

print("\n" + "="*70)
print("üìä LOG INSIGHT #4: SEASONALITY ANALYSIS")
print("="*70)

print(f"\nMetric: Seasonality")
print(f"Dimension: Time + Product")
print(f"Observation: Most products peak in {common_spike_month} (+{avg_spike_percent}% vs avg)")
print(f"             Lowest performance typically in {common_low_month} ({avg_low_percent}% vs avg)")
print(f"Stakeholders: CMO (campaign planning), COO (capacity planning)")
print(f"Priority: MEDIUM")

print("\nüî• TOP PERFORMING MONTHS PER PRODUCT:")
for _, row in top_months.head(5).iterrows():
    print(f"   ‚Ä¢ {row['product_name_cleaned']}: {row['order_month']} (+{row['vs_avg_%']:.1f}%)")

print("\n‚ùÑÔ∏è WEAKEST PERFORMING MONTHS PER PRODUCT:")
for _, row in bottom_months.head(5).iterrows():
    print(f"   ‚Ä¢ {row['product_name_cleaned']}: {row['order_month']} ({row['vs_avg_%']:.1f}%)")

print("\nüí° STRATEGIC RECOMMENDATIONS:")
print("   1. INVENTORY: Stock up on high-demand products before peak months")
print(f"   2. MARKETING: Launch campaigns 1-2 months before {common_spike_month}")
print(f"   3. PROMOTIONS: Offer discounts during {common_low_month} to boost slow periods")
print("   4. STAFFING: Increase customer service capacity during peak seasons")
print("   5. FORECASTING: Use historical patterns for accurate demand planning")

print("\n" + "="*70)

# Save results
monthly_revenue.to_csv("monthly_seasonality_analysis.csv", index=False)
top_months.to_csv("peak_months_by_product.csv", index=False)


üìä LOG INSIGHT #4: SEASONALITY ANALYSIS

Metric: Seasonality
Dimension: Time + Product
Observation: Most products peak in December (+54.84% vs avg)
             Lowest performance typically in October (-32.98% vs avg)
Stakeholders: CMO (campaign planning), COO (capacity planning)
Priority: MEDIUM

üî• TOP PERFORMING MONTHS PER PRODUCT:
   ‚Ä¢ acer nitro v gaming laptop: December (+108.9%)
   ‚Ä¢ dell gaming mouse: December (+102.5%)
   ‚Ä¢ lenovo ideapad gaming 3: December (+62.6%)
   ‚Ä¢ sony playstation 5 bundle: December (+43.2%)
   ‚Ä¢ 27in 4k gaming monitor: December (+39.0%)

‚ùÑÔ∏è WEAKEST PERFORMING MONTHS PER PRODUCT:
   ‚Ä¢ dell gaming mouse: March (-84.0%)
   ‚Ä¢ acer nitro v gaming laptop: April (-61.1%)
   ‚Ä¢ lenovo ideapad gaming 3: October (-24.7%)
   ‚Ä¢ sony playstation 5 bundle: July (-23.5%)
   ‚Ä¢ nintendo switch: October (-21.8%)

üí° STRATEGIC RECOMMENDATIONS:
   1. INVENTORY: Stock up on high-demand products before peak months
   2. MARKETING: Launch campai