In [7]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patheffects as path_effects


file_path = r"E:\Projects\Gamezone Orders Data\Data\Cleaned\gamezone_orders_data_cleaned.csv"
df = pd.read_csv(
    file_path,
    parse_dates=['purchase_ts_cleaned', 'ship_ts'],
    dtype={
        'purchase_year': 'Int64',
        'purchase_month': 'Int64',
        'time_to_ship': 'Int64',
        'revenue': 'float'
    },
    encoding='utf-8',
    keep_default_na=False,
    na_values=['']  # Only treat empty strings as NaN
)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21864 entries, 0 to 21863
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   user_id                          21864 non-null  object        
 1   order_id                         21864 non-null  object        
 2   purchase_ts                      21864 non-null  object        
 3   purchase_ts_cleaned              21863 non-null  datetime64[ns]
 4   purchase_year                    21864 non-null  Int64         
 5   purchase_month                   21864 non-null  Int64         
 6   time_to_ship                     21864 non-null  Int64         
 7   ship_ts                          21864 non-null  datetime64[ns]
 8   product_name                     21864 non-null  object        
 9   product_name_cleaned             21864 non-null  object        
 10  product_id                       21864 non-null  object   

In [9]:
df

Unnamed: 0,user_id,order_id,purchase_ts,purchase_ts_cleaned,purchase_year,purchase_month,time_to_ship,ship_ts,product_name,product_name_cleaned,...,usd_price,purchase_platform,marketing_channel,marketing_channel_cleaned,account_creation_method,account_creation_method_cleaned,country_code,date_check,region,revenue
0,563f3664,fcffccf998a37874,28-02-2021,2021-02-28,2021,2,2,2021-03-02,Dell Gaming Mouse,dell gaming mouse,...,$49.98,mobile app,email,email,desktop,desktop,US,True,,49.98
1,5dc182d2,e22f13ed65b40901,28-02-2021,2021-02-28,2021,2,2,2021-03-02,Dell Gaming Mouse,dell gaming mouse,...,$49.98,mobile app,direct,direct,desktop,desktop,US,True,,49.98
2,c8611dac,da3d8982ca484718,28-02-2021,2021-02-28,2021,2,2,2021-03-02,JBL Quantum 100 Gaming Headset,jbl quantum 100 gaming headset,...,$23.78,mobile app,email,email,desktop,desktop,HK,True,EMEA,23.78
3,ee3a8610,cea650a77cf100629,28-02-2021,2021-02-28,2021,2,1,2021-03-01,JBL Quantum 100 Gaming Headset,jbl quantum 100 gaming headset,...,$23.98,mobile app,direct,direct,desktop,desktop,US,True,,23.98
4,439f68f8,cd78025e15730437,28-02-2021,2021-02-28,2021,2,2,2021-03-02,JBL Quantum 100 Gaming Headset,jbl quantum 100 gaming headset,...,$13.98,mobile app,email,email,desktop,desktop,US,True,,13.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21859,576f4fb0,67615df909638344,01-01-2019,2019-01-01,2019,1,2,2019-01-03,JBL Quantum 100 Gaming Headset,jbl quantum 100 gaming headset,...,$24.90,website,direct,direct,desktop,desktop,GB,True,EMEA,24.90
21860,e12d652c,63c5d09516b95166,01-01-2019,2019-01-01,2019,1,3,2019-01-04,Nintendo Switch,nintendo switch,...,$162.70,website,direct,direct,desktop,desktop,NO,True,EMEA,162.70
21861,4ce3061a,51cb708898534168,01-01-2019,2019-01-01,2019,1,2,2019-01-03,Nintendo Switch,nintendo switch,...,$168.00,website,direct,direct,desktop,desktop,US,True,,168.00
21862,4ce3061a,51cb708898534167,01-01-2019,2019-01-01,2019,1,2,2019-01-03,Nintendo Switch,nintendo switch,...,$168.00,website,direct,direct,desktop,desktop,US,True,,168.00


In [10]:
print(df.columns.tolist())


['user_id', 'order_id', 'purchase_ts', 'purchase_ts_cleaned', 'purchase_year', 'purchase_month', 'time_to_ship', 'ship_ts', 'product_name', 'product_name_cleaned', 'product_id', 'usd_price', 'purchase_platform', 'marketing_channel', 'marketing_channel_cleaned', 'account_creation_method', 'account_creation_method_cleaned', 'country_code', 'date_check', 'region', 'revenue']


In [11]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch
from matplotlib.ticker import FuncFormatter

# ---------------- THEME COLORS ----------------
FIG_BG   = "#0f172a"   # page background
AX_BG    = "#1e293b"   # plot background
TEXT_L   = "#f1f5f9"   # light slate text
TEXT_M   = "#94a3b8"   # subtle x labels
SPINE_B  = "#475569"   # faint axis line

# New distinct light color for cumulative %
CUMM_COLOR = "#f5b67f"   # light aqua / cyan glow

# Gradient palette for ABC
abc_colors_palette = ["#7B1FA2", "#03b2ed", "#FF3399"]
abc_color_map = {"A": abc_colors_palette[0], "B": abc_colors_palette[1], "C": abc_colors_palette[2]}

# ---------------- DATA PREP ----------------
sorted_df = df.sort_values('total_revenue', ascending=False).reset_index(drop=True)

def wrap_and_truncate(name, word_limit=2, char_limit=16):
    words = name.split()
    short = " ".join(words[:word_limit])
    if len(short) > char_limit:
        short = short[:char_limit] + "…"
    return short.replace(" ", "\n")

sorted_df["label"] = sorted_df["product_name_cleaned"].apply(wrap_and_truncate)
colors = [abc_color_map[c] for c in sorted_df['abc_class']]

# ---------------- FIGURE ----------------
fig, ax1 = plt.subplots(figsize=(14, 7))
fig.patch.set_facecolor(FIG_BG)
ax1.set_facecolor(AX_BG)

# ---------------- BAR CHART ----------------
bars = ax1.bar(
    sorted_df['label'],
    sorted_df['total_revenue'] / 1000,
    color=colors,
    edgecolor=AX_BG,
    linewidth=1.2
)

ax1.set_ylabel("Revenue ($K)", fontsize=12, fontweight="bold", color=TEXT_L)

ax1.set_xticks(np.arange(len(sorted_df)))
ax1.set_xticklabels(sorted_df['label'], rotation=0, ha='center', fontsize=10, color=TEXT_M)

ax1.tick_params(axis='y', labelcolor=TEXT_L)
ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{int(x)}K'))

# subtle spines
ax1.spines['bottom'].set_color(SPINE_B)
ax1.spines['left'].set_color(SPINE_B)
ax1.spines['top'].set_color(AX_BG)
ax1.spines['right'].set_color(AX_BG)


# ---------------- RPR % LINE (no axis) ----------------
ax2 = ax1.twinx()
ax2.set_facecolor("none")
ax2.plot(
    sorted_df['label'],
    sorted_df['rpr_%'],
    color=TEXT_L,
    marker='o',
    markersize=6,
    linewidth=2,
    label="RPR %"
)

# remove RPR axis entirely
ax2.set_yticks([])
ax2.set_ylabel("")
for spine in ax2.spines.values():
    spine.set_visible(False)


# ---------------- CUMULATIVE % LINE (new light color, no axis) ----------------
ax3 = ax1.twinx()
ax3.set_facecolor("none")
ax3.spines['right'].set_position(("axes", 1.06))

ax3.plot(
    sorted_df['label'],
    sorted_df['cumulative_revenue_%'],
    color=CUMM_COLOR,
    marker='s',
    markersize=6,
    linewidth=2.4,
    linestyle='-',
    label="Cumulative Revenue %"
)

# remove cumulative axis
ax3.set_yticks([])
ax3.set_ylabel("")
for spine in ax3.spines.values():
    spine.set_visible(False)


# ---------------- 80% THRESHOLD ----------------
threshold_idx = np.argmax(sorted_df['cumulative_revenue_%'] >= 80)

ax1.axvline(
    x=threshold_idx,
    color="#FF3399",
    linestyle="--",
    linewidth=2,
    alpha=0.8
)


# ---------------- LEGEND ----------------
legend_elements = [
    Patch(facecolor=abc_colors_palette[0], label='Class A'),
    Patch(facecolor=abc_colors_palette[1], label='Class B'),
    Patch(facecolor=abc_colors_palette[2], label='Class C'),
    plt.Line2D([0], [0], color=TEXT_L, marker='o', linewidth=2, label='RPR %'),
    plt.Line2D([0], [0], color=CUMM_COLOR, marker='s', linewidth=2, label='Cumulative %'),
    plt.Line2D([0], [0], color="#FF3399", linestyle='--', linewidth=2, label='80% Cut-Off')
]

ax1.legend(
    handles=legend_elements,
    loc='upper right',
    fontsize=10,
    framealpha=0.15,
    facecolor=AX_BG,
    edgecolor=SPINE_B,
    labelcolor=TEXT_L
)


plt.title(
    "Pareto Analysis: Revenue + RPR % + Cumulative %",
    fontsize=16,
    fontweight="bold",
    color="white",
    pad=15
)

plt.tight_layout()
plt.show()


KeyError: 'total_revenue'