In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patheffects as path_effects


file_path = r"E:\Projects\Gamezone Orders Data\Data\Cleaned\gamezone_orders_data_cleaned.csv"
df = pd.read_csv(
    file_path,
    parse_dates=['purchase_ts_cleaned', 'ship_ts'],
    dtype={
        'purchase_year': 'Int64',
        'purchase_month': 'Int64',
        'time_to_ship': 'Int64',
        'revenue': 'float'
    },
    encoding='utf-8',
    keep_default_na=False,
    na_values=['']  # Only treat empty strings as NaN
)

In [5]:
# --- üåç GEOGRAPHICAL KPIs ---

# 1Ô∏è‚É£ Top Countries by Revenue (All countries ranked)
top_countries = (
    df.groupby('country_code')['revenue']
    .sum()
    .reset_index()
    .sort_values(by='revenue', ascending=False)
)
top_countries['rank'] = range(1, len(top_countries) + 1)
top_countries['revenue_share_%'] = (
    top_countries['revenue'] / top_countries['revenue'].sum() * 100
).round(2)

# 2Ô∏è‚É£ Comprehensive Country Performance
country_perf = (
    df.groupby('country_code')
    .agg(
        total_revenue=('revenue', 'sum'),
        total_orders=('order_id', 'count'),
        unique_customers=('user_id', 'nunique')
    )
    .reset_index()
)
country_perf['aov'] = country_perf['total_revenue'] / country_perf['total_orders']
country_perf['revenue_per_customer'] = (
    country_perf['total_revenue'] / country_perf['unique_customers']
)
country_perf['orders_per_customer'] = (
    country_perf['total_orders'] / country_perf['unique_customers']
)
country_perf['revenue_share_%'] = (
    country_perf['total_revenue'] / country_perf['total_revenue'].sum() * 100
).round(2)

# Add rank and top 5 flag
country_perf = country_perf.sort_values('total_revenue', ascending=False).reset_index(drop=True)
country_perf['rank'] = range(1, len(country_perf) + 1)
country_perf['top5_flag'] = (country_perf['rank'] <= 5).astype(int)

# 3Ô∏è‚É£ Regional Performance
region_perf = (
    df.groupby('region')
    .agg(
        total_revenue=('revenue', 'sum'),
        total_orders=('order_id', 'count'),
        unique_customers=('user_id', 'nunique')
    )
    .reset_index()
)
region_perf['aov'] = region_perf['total_revenue'] / region_perf['total_orders']
region_perf['revenue_per_customer'] = (
    region_perf['total_revenue'] / region_perf['unique_customers']
)
region_perf['orders_per_customer'] = (
    region_perf['total_orders'] / region_perf['unique_customers']
)
region_perf['revenue_share_%'] = (
    region_perf['total_revenue'] / region_perf['total_revenue'].sum() * 100
).round(2)
region_perf = region_perf.sort_values('total_revenue', ascending=False)

# 4Ô∏è‚É£ Country √ó Region Mapping
country_region_map = (
    df.groupby(['country_code', 'region'])
    .agg(
        total_revenue=('revenue', 'sum'),
        total_orders=('order_id', 'count')
    )
    .reset_index()
    .sort_values('total_revenue', ascending=False)
)

# 5Ô∏è‚É£ Monthly Revenue Trend by Region
monthly_region_revenue = (
    df.groupby(['purchase_year', 'purchase_month', 'region'])['revenue']
    .sum()
    .reset_index()
    .sort_values(['region', 'purchase_year', 'purchase_month'])
)

# Calculate YoY growth for each region
monthly_region_revenue['revenue_growth_%'] = (
    monthly_region_revenue.groupby('region')['revenue']
    .pct_change() * 100
)

# 6Ô∏è‚É£ Top 5 Countries Detail
top5_countries_detail = country_perf[country_perf['top5_flag'] == 1].copy()

# 7Ô∏è‚É£ Geographic Summary
geo_summary = pd.DataFrame({
    'metric': [
        'total_countries',
        'total_regions',
        'top_country_revenue_share_%',
        'top_5_countries_revenue_share_%',
        'top_region_revenue_share_%'
    ],
    'value': [
        country_perf['country_code'].nunique(),
        region_perf['region'].nunique(),
        country_perf.iloc[0]['revenue_share_%'],
        country_perf[country_perf['top5_flag'] == 1]['revenue_share_%'].sum(),
        region_perf.iloc[0]['revenue_share_%']
    ]
})

# Display results
print("GEOGRAPHIC SUMMARY")
display(geo_summary)

print("\nREGIONAL PERFORMANCE")
display(region_perf)

print("\nTOP 5 COUNTRIES")
display(top5_countries_detail)

print("\nALL COUNTRIES PERFORMANCE")
display(country_perf)

GEOGRAPHIC SUMMARY


Unnamed: 0,metric,value
0,total_countries,152.0
1,total_regions,5.0
2,top_country_revenue_share_%,48.05
3,top_5_countries_revenue_share_%,67.33
4,top_region_revenue_share_%,48.25



REGIONAL PERFORMANCE


Unnamed: 0,region,total_revenue,total_orders,unique_customers,aov,revenue_per_customer,orders_per_customer,revenue_share_%
3,,2968096.26,10330,9388,287.327808,316.158528,1.100341,48.25
1,EMEA,2562575.68,9515,8640,269.319567,296.594407,1.101273,41.66
0,APAC,531226.11,1588,1430,334.525258,371.48679,1.11049,8.64
2,LATAM,85841.4,389,356,220.671979,241.127528,1.092697,1.4
4,unknown,3527.04,42,37,83.977143,95.325405,1.135135,0.06



TOP 5 COUNTRIES


Unnamed: 0,country_code,total_revenue,total_orders,unique_customers,aov,revenue_per_customer,orders_per_customer,revenue_share_%,rank,top5_flag
0,US,2955813.22,10294,9355,287.139423,315.960793,1.100374,48.05,1,1
1,GB,475395.19,1808,1644,262.939817,289.169824,1.099757,7.73,2,1
2,DE,255805.41,855,774,299.187614,330.497946,1.104651,4.16,3,1
3,CA,233775.22,956,850,244.534749,275.029671,1.124706,3.8,4,1
4,JP,220627.68,525,461,420.2432,478.584989,1.138829,3.59,5,1



ALL COUNTRIES PERFORMANCE


Unnamed: 0,country_code,total_revenue,total_orders,unique_customers,aov,revenue_per_customer,orders_per_customer,revenue_share_%,rank,top5_flag
0,US,2955813.22,10294,9355,287.139423,315.960793,1.100374,48.05,1,1
1,GB,475395.19,1808,1644,262.939817,289.169824,1.099757,7.73,2,1
2,DE,255805.41,855,774,299.187614,330.497946,1.104651,4.16,3,1
3,CA,233775.22,956,850,244.534749,275.029671,1.124706,3.80,4,1
4,JP,220627.68,525,461,420.243200,478.584989,1.138829,3.59,5,1
...,...,...,...,...,...,...,...,...,...,...
147,AW,49.98,1,1,49.980000,49.980000,1.000000,0.00,148,0
148,AP,48.00,2,1,24.000000,48.000000,2.000000,0.00,149,0
149,BH,24.00,1,1,24.000000,24.000000,1.000000,0.00,150,0
150,ME,23.18,1,1,23.180000,23.180000,1.000000,0.00,151,0
