In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime

# Base folders
KB = Path("wrestling_sales_kb")
DATA = KB / "data"
DATA.mkdir(parents=True, exist_ok=True)


In [2]:
# file paths
files = {
    "ppvs": DATA / "ppvs.csv",
    "events": DATA / "events.csv",
    "ppv_sales": DATA / "ppv_sales.csv",        # buys + revenue
    "merch_items": DATA / "merch_items.csv",
    "merch_sales": DATA / "merch_sales.csv",
    "techniques": DATA / "techniques.csv",
    "technique_usage": DATA / "technique_usage.csv",
}

# helper to write CSV if missing
def maybe_write(df, path):
    if not path.exists():
        df.to_csv(path, index=False)

# --- PPV core list ---
ppvs = pd.DataFrame([
    ["ppv_wwe_wm38","WWE","WrestleMania 38","Premier annual WWE PPV"],
    ["ppv_aew_allin2025","AEW","All In Texas 2025","AEW Annual Flagship PPV"],
], columns=["ppv_id","promotion","name","description"])

# --- Event details for PPVs ---
events = pd.DataFrame([
    ["evt_wm38","ppv_wwe_wm38","WrestleMania 38","2022-04-02","Los Angeles, CA","SoFi Stadium",None],
    ["evt_allin25","ppv_aew_allin2025","AEW All In Texas","2025-07-19","Houston, TX","NRG Stadium",None],
], columns=["event_id","ppv_id","event_name","date","city","venue","attendance"])

# --- PPV sales (buys + estimated revenue) ---
ppv_sales = pd.DataFrame([
    # event_id, platform, buys_estimate, revenue_est_usd, source
    ["evt_allin25","Digital+Traditional",175000, None,"wrestling observer estimate"],
], columns=["event_id","platform","buys_estimate","revenue_est_usd","source"])

# --- Merch items catalog ---
merch_items = pd.DataFrame([
    ["m_wweshop_codys","WWE Shop","Cody Rhodes Tee","apparel"],
    ["m_aewshop_page","AEW Shop","Hangman Adam Page Shirt","apparel"],
], columns=["merch_id","store","item_name","category"])

# --- Merch sales transaction estimates ---
merch_sales = pd.DataFrame([
    ["sale_wm38_cody","evt_wm38","m_wweshop_codys","venue",900,900*35.00,"USD","estimate","collected estimate"],
    ["sale_allin25_top","evt_allin25","m_aewshop_page","venue",12000,725000,"USD","reported","Meltzer report"],  # $725K merch revenue for AEW All In 2025 reported :contentReference[oaicite:1]{index=1}
], columns=[
    "sale_id","event_id","merch_id","channel","units","revenue_usd",
    "currency","confidence","source"
])

# --- Techniques knowledgebase (open PPV move analysis) ---
techniques = pd.DataFrame([
    ["t_superkick","Superkick","strike","medium","Front snap kick style strike used frequently in modern pro wrestling"],
    ["t_german_suplex","German Suplex","suplex","low","Classic rear waistlock suplex from grappling base"],
], columns=["technique_id","name","category","risk_level","description"])

# --- Usage of techniques in matches (collaborative) ---
technique_usage = pd.DataFrame([
    ["u1","evt_wm38","Title Match","Cody Rhodes","t_superkick",1,"no","audience observation"],
], columns=[
    "usage_id","event_id","match_label","performer","technique_id","count","finish_spot","notes"
])

# write CSVs if absent
for df,name in zip([ppvs,events,ppv_sales,merch_items,merch_sales,techniques,technique_usage],
                   files.values()):
    maybe_write(df,name)

print("Datasets initialized at:", DATA.resolve())


Datasets initialized at: /Users/adnanaltimeemy/wrestling_sales_kb/data


In [3]:
dfs = {k: pd.read_csv(v) for k,v in files.items()}

for name,df in dfs.items():
    print(f"{name}: {df.shape[0]} rows")
    display(df.head())


ppvs: 2 rows


Unnamed: 0,ppv_id,promotion,name,description
0,ppv_wwe_wm38,WWE,WrestleMania 38,Premier annual WWE PPV
1,ppv_aew_allin2025,AEW,All In Texas 2025,AEW Annual Flagship PPV


events: 2 rows


Unnamed: 0,event_id,ppv_id,event_name,date,city,venue,attendance
0,evt_wm38,ppv_wwe_wm38,WrestleMania 38,2022-04-02,"Los Angeles, CA",SoFi Stadium,
1,evt_allin25,ppv_aew_allin2025,AEW All In Texas,2025-07-19,"Houston, TX",NRG Stadium,


ppv_sales: 1 rows


Unnamed: 0,event_id,platform,buys_estimate,revenue_est_usd,source
0,evt_allin25,Digital+Traditional,175000,,wrestling observer estimate


merch_items: 2 rows


Unnamed: 0,merch_id,store,item_name,category
0,m_wweshop_codys,WWE Shop,Cody Rhodes Tee,apparel
1,m_aewshop_page,AEW Shop,Hangman Adam Page Shirt,apparel


merch_sales: 2 rows


Unnamed: 0,sale_id,event_id,merch_id,channel,units,revenue_usd,currency,confidence,source
0,sale_wm38_cody,evt_wm38,m_wweshop_codys,venue,900,31500.0,USD,estimate,collected estimate
1,sale_allin25_top,evt_allin25,m_aewshop_page,venue,12000,725000.0,USD,reported,Meltzer report


techniques: 2 rows


Unnamed: 0,technique_id,name,category,risk_level,description
0,t_superkick,Superkick,strike,medium,Front snap kick style strike used frequently i...
1,t_german_suplex,German Suplex,suplex,low,Classic rear waistlock suplex from grappling base


technique_usage: 1 rows


Unnamed: 0,usage_id,event_id,match_label,performer,technique_id,count,finish_spot,notes
0,u1,evt_wm38,Title Match,Cody Rhodes,t_superkick,1,no,audience observation


In [4]:
# merge sales with merch catalog + events
sales = dfs["merch_sales"]
items = dfs["merch_items"]
events = dfs["events"].rename(columns={"event_id":"event_id"})

merged = sales.merge(items, on="merch_id", how="left").merge(events,on="event_id",how="left")

# total revenue by event
rev_by_event = merged.groupby(["event_name","channel"])["revenue_usd"].sum().reset_index()
rev_by_event


Unnamed: 0,event_name,channel,revenue_usd
0,AEW All In Texas,venue,725000.0
1,WrestleMania 38,venue,31500.0


In [5]:
ppvinfo = dfs["ppv_sales"].merge(dfs["events"][["event_id","event_name"]],on="event_id")
ppvinfo


Unnamed: 0,event_id,platform,buys_estimate,revenue_est_usd,source,event_name
0,evt_allin25,Digital+Traditional,175000,,wrestling observer estimate,AEW All In Texas


In [6]:
import uuid

def new_id(prefix):
    return f"{prefix}_{uuid.uuid4().hex[:8]}"

def add_merch_sale(event_id, merch_id, channel, units, revenue_usd, source="user_input"):
    row = {
        "sale_id": new_id("sale"),
        "event_id": event_id,
        "merch_id": merch_id,
        "channel": channel,
        "units": units,
        "revenue_usd": revenue_usd,
        "currency":"USD",
        "confidence":"reported",
        "source": source
    }
    df = pd.read_csv(files["merch_sales"])
    df = pd.concat([df,pd.DataFrame([row])],ignore_index=True)
    df.to_csv(files["merch_sales"], index=False)
    return row

# example usage
new = add_merch_sale("evt_wm38","m_wweshop_codys","online",450,450*35.0,"notional")
new


{'sale_id': 'sale_d50d8150',
 'event_id': 'evt_wm38',
 'merch_id': 'm_wweshop_codys',
 'channel': 'online',
 'units': 450,
 'revenue_usd': 15750.0,
 'currency': 'USD',
 'confidence': 'reported',
 'source': 'notional'}