In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random


In [3]:
teams = [
    "Lakers", "Warriors", "Celtics", "Bulls", "Heat",
    "Knicks", "Nets", "Suns", "Mavericks", "Spurs",
    "76ers", "Bucks", "Raptors", "Nuggets", "Clippers"
]

teams_df = pd.DataFrame({
    "team_id": range(1, len(teams)+1),
    "team_name": teams,
    "market_size": np.random.choice(["Small", "Medium", "Large"], size=len(teams)),
    "avg_ticket_price": np.random.randint(65, 250, size=len(teams))
})

teams_df.head()


Unnamed: 0,team_id,team_name,market_size,avg_ticket_price
0,1,Lakers,Small,191
1,2,Warriors,Large,118
2,3,Celtics,Medium,152
3,4,Bulls,Small,200
4,5,Heat,Small,130


In [4]:
merch_categories = [
    "Jerseys", "Hats", "T-Shirts", "Hoodies",
    "Collectibles", "Footwear"
]

sales_channels = ["Online", "Arena Store", "Retail Partner"]

num_records = 2000

merch_sales_df = pd.DataFrame({
    "sale_id": range(1, num_records + 1),
    "team": np.random.choice(teams, num_records),
    "category": np.random.choice(merch_categories, num_records),
    "channel": np.random.choice(sales_channels, num_records),
    "units_sold": np.random.randint(1, 5, num_records),
    "unit_price": np.random.randint(25, 180, num_records),
    "sale_date": [
        datetime(2022, 1, 1) + timedelta(days=random.randint(0, 900))
        for _ in range(num_records)
    ]
})

merch_sales_df["total_revenue"] = (
    merch_sales_df["units_sold"] * merch_sales_df["unit_price"]
)

merch_sales_df.head()


Unnamed: 0,sale_id,team,category,channel,units_sold,unit_price,sale_date,total_revenue
0,1,Bucks,Jerseys,Retail Partner,3,29,2022-11-24,87
1,2,Knicks,Jerseys,Retail Partner,3,171,2023-06-10,513
2,3,Knicks,T-Shirts,Retail Partner,3,144,2022-03-06,432
3,4,Nuggets,Hats,Arena Store,1,109,2023-01-03,109
4,5,Nets,Hats,Arena Store,1,123,2022-10-14,123


In [5]:
star_players = [
    "LeBron James", "Stephen Curry", "Kevin Durant",
    "Giannis Antetokounmpo", "Luka Dončić",
    "Jayson Tatum", "Jimmy Butler"
]

player_merch_df = pd.DataFrame({
    "player": np.random.choice(star_players, 1200),
    "team": np.random.choice(teams, 1200),
    "merch_type": np.random.choice(merch_categories, 1200),
    "units_sold": np.random.randint(1, 8, 1200),
    "price": np.random.randint(60, 250, 1200),
    "promotion_event": np.random.choice(
        ["None", "Playoffs", "Finals", "All-Star Weekend", "MVP Run"],
        1200
    )
})

player_merch_df["revenue"] = (
    player_merch_df["units_sold"] * player_merch_df["price"]
)

player_merch_df.head()


Unnamed: 0,player,team,merch_type,units_sold,price,promotion_event,revenue
0,Jimmy Butler,Raptors,Hoodies,1,75,Playoffs,75
1,Jimmy Butler,Suns,Hoodies,4,124,All-Star Weekend,496
2,Jimmy Butler,Nets,Hoodies,1,219,All-Star Weekend,219
3,Luka Dončić,Suns,Collectibles,2,216,MVP Run,432
4,Luka Dončić,Raptors,Hats,2,67,All-Star Weekend,134


In [6]:
ppv_events = [
    "NBA Finals",
    "Conference Finals",
    "Play-In Tournament",
    "Christmas Day Games",
    "All-Star Game"
]

ppv_df = pd.DataFrame({
    "event": np.random.choice(ppv_events, 500),
    "season": np.random.choice(["2021-22", "2022-23", "2023-24"], 500),
    "price": np.random.randint(15, 75, 500),
    "buys": np.random.randint(50_000, 4_000_000, 500),
    "region": np.random.choice(
        ["North America", "Europe", "Asia", "Global"],
        500
    )
})

ppv_df["revenue"] = ppv_df["price"] * ppv_df["buys"]

ppv_df.head()


Unnamed: 0,event,season,price,buys,region,revenue
0,NBA Finals,2023-24,69,2260216,Asia,155954904
1,Play-In Tournament,2023-24,42,2726249,Global,114502458
2,Play-In Tournament,2022-23,64,2666666,Asia,170666624
3,Christmas Day Games,2023-24,22,2630635,Global,57873970
4,All-Star Game,2022-23,58,636026,Global,36889508


In [7]:
team_revenue_summary = (
    merch_sales_df
    .groupby("team")["total_revenue"]
    .sum()
    .reset_index()
    .sort_values(by="total_revenue", ascending=False)
)

team_revenue_summary.head()


Unnamed: 0,team,total_revenue
2,Bulls,37312
5,Heat,36625
7,Lakers,35633
14,Warriors,34516
8,Mavericks,34018


In [8]:
channel_performance = (
    merch_sales_df
    .groupby("channel")["total_revenue"]
    .sum()
    .reset_index()
)

channel_performance


Unnamed: 0,channel,total_revenue
0,Arena Store,176921
1,Online,155768
2,Retail Partner,159717


In [9]:
knowledgebase_df = merch_sales_df.merge(
    teams_df,
    left_on="team",
    right_on="team_name",
    how="left"
)

knowledgebase_df.head()


Unnamed: 0,sale_id,team,category,channel,units_sold,unit_price,sale_date,total_revenue,team_id,team_name,market_size,avg_ticket_price
0,1,Bucks,Jerseys,Retail Partner,3,29,2022-11-24,87,12,Bucks,Small,176
1,2,Knicks,Jerseys,Retail Partner,3,171,2023-06-10,513,6,Knicks,Medium,199
2,3,Knicks,T-Shirts,Retail Partner,3,144,2022-03-06,432,6,Knicks,Medium,199
3,4,Nuggets,Hats,Arena Store,1,109,2023-01-03,109,14,Nuggets,Large,124
4,5,Nets,Hats,Arena Store,1,123,2022-10-14,123,7,Nets,Large,178


In [10]:
merch_sales_df.to_csv("nba_merch_sales.csv", index=False)
player_merch_df.to_csv("nba_player_merch.csv", index=False)
ppv_df.to_csv("nba_ppv_revenue.csv", index=False)
