### Importing games data from basketball reference

import pandas as pd
import numpy as np
from faker import Faker 
fake = Faker()

# Set seed for reproducibility
np.random.seed(42)

# --- Games Table ---
games = pd.read_excel('2023-2024-season.xlsx')  # Replace 'Games' with the sheet name if needed

# Replace empty strings in H/A with 'Home' and '@' with 'Away'
games['H/A'] = games['H/A'].replace({'@': 'Away'})
games.fillna({'H/A': "Home"}, inplace=True)

# Replace empty strings in Reg/OT with 'Reg'
games.fillna({'Reg/OT': "Reg"}, inplace=True)

# Replace empty strings in Notes with 'Regular Season'
games.fillna({'Notes': "Regular Season"}, inplace=True)

# Add time-based trends
games['TimeOfDay'] = games['Start (ET)'].apply(lambda x: 'Evening' if int(x.split(':')[0]) >= 18 else 'Afternoon')

# --- Customers Table ---
customers = pd.DataFrame({
    'CustomerID': range(1, 501),  # 500 unique customers
    'Name': [fake.name() for _ in range(500)],  # Randomized customer names
    'Age': np.random.randint(18, 65, 500),
    'TotalSpend ($)': np.random.randint(500, 10000, 500)
})

# Create the Email column based on the Name column
customers['Email'] = customers['Name'].str.replace(' ', '.').str.lower() + "@example.com"

# --- Food Products Table ---
food_items = pd.DataFrame({
    'FoodItemID': range(1, 16),
    'FoodName': [
        'Hot Dog', 'Nachos', 'Burger', 'Fries', 'Pizza Slice', 
        'Popcorn', 'Pretzel', 'Chicken Tenders', 'Soda', 'Beer', 
        'Wine', 'Water Bottle', 'Candy', 'Ice Cream', 'Coffee'
    ],
    'Category': [
        'Snacks', 'Snacks', 'Meals', 'Snacks', 'Meals',
        'Snacks', 'Snacks', 'Meals', 'Drinks', 'Drinks',
        'Drinks', 'Drinks', 'Snacks', 'Dessert', 'Drinks'
    ],
    'Price ($)': [5, 7, 10, 4, 8, 6, 5, 12, 3, 8, 10, 2, 3, 6, 4]
})

# --- Merchandise Products Table ---
merchandise_items = pd.DataFrame({
    'ProductID': range(1, 19),
    'ProductName': [
        'Home Jersey', 'Away Jersey', 'Baseball Cap', 'Scarf', 'Keychain',
        'Mug', 'Sticker Pack', 'Autographed Ball', 'Team Poster', 'Backpack',
        'Hoodie', 'T-Shirt', 'Team Jacket', 'Bobblehead', 'Phone Case',
        'Socks', 'Team Flag', 'Water Bottle'
    ],
    'Category': [
        'Apparel', 'Apparel', 'Accessories', 'Accessories', 'Accessories',
        'Drinkware', 'Misc', 'Collectibles', 'Posters', 'Bags',
        'Apparel', 'Apparel', 'Apparel', 'Collectibles', 'Accessories',
        'Apparel', 'Accessories', 'Drinkware'
    ],
    'Price ($)': [80, 85, 25, 20, 10, 15, 5, 150, 12, 50, 60, 25, 100, 30, 18, 10, 20, 12]
})

# --- TicketSales Table ---
seat_tiers = {
    "Courtside": 500,
    "Courtside Suites": 450,
    "Lower Bowl": 250,
    "Suites": 350,
    "Upper Bowl": 100
}

ticket_sales_data = []
for game_id, attendance in zip(games['GameID'], games['Attendance']):
    courtside_tickets = int(attendance * 0.1)
    courtside_suites_tickets = int(attendance * 0.05)
    lower_bowl_tickets = int(attendance * 0.3)
    suites_tickets = int(attendance * 0.2)
    upper_bowl_tickets = attendance - (courtside_tickets + courtside_suites_tickets + lower_bowl_tickets + suites_tickets)

    tickets = []
    tickets += [['Courtside', seat_tiers['Courtside']] for _ in range(courtside_tickets)]
    tickets += [['Courtside Suites', seat_tiers['Courtside Suites']] for _ in range(courtside_suites_tickets)]
    tickets += [['Lower Bowl', seat_tiers['Lower Bowl']] for _ in range(lower_bowl_tickets)]
    tickets += [['Suites', seat_tiers['Suites']] for _ in range(suites_tickets)]
    tickets += [['Upper Bowl', seat_tiers['Upper Bowl']] for _ in range(upper_bowl_tickets)]

    game_tickets = pd.DataFrame(tickets, columns=['SeatingType', 'TicketPrice ($)'])
    game_tickets['CustomerID'] = np.random.choice(customers['CustomerID'], len(game_tickets))
    game_tickets['GameID'] = [game_id] * len(game_tickets)

    ticket_sales_data.append(game_tickets)

ticket_sales = pd.concat(ticket_sales_data, ignore_index=True)

# --- Food and Beverage Sales Table ---
food_beverage_sales_data = []
for game_id, attendance in zip(games['GameID'], games['Attendance']):
    # Randomize the number of food and beverage transactions (percentage of attendance)
    num_transactions = np.random.randint(int(attendance * 0.2), int(attendance * 0.5))
    
    transactions = []
    for _ in range(num_transactions):
        # Randomize the number of unique items purchased in the transaction
        num_items = np.random.randint(1, 4)  # 1 to 3 unique items per transaction
        selected_items = np.random.choice(food_items['FoodItemID'], num_items, replace=False)
        
        # Calculate total cost for the transaction
        total_amount = sum(
            np.random.choice(food_items.loc[food_items['FoodItemID'] == item, 'Price ($)'].values) * 
            np.random.randint(1, 5)  # Quantity between 1 and 4
            for item in selected_items
        )
        
        transactions.append({
            'OrderID': len(food_beverage_sales_data) + len(transactions) + 1,
            'CustomerID': np.random.choice(customers['CustomerID']),
            'GameID': game_id,
            'FoodItemsPurchased': ", ".join(
                food_items.loc[food_items['FoodItemID'].isin(selected_items), 'FoodName']
            ),
            'AmountSpent ($)': total_amount
        })
    
    food_beverage_sales_data.extend(transactions)

food_beverage_sales = pd.DataFrame(food_beverage_sales_data)


# --- Merchandise Sales Table ---
merchandise_sales_data = []
for game_id, attendance in zip(games['GameID'], games['Attendance']):
    # Randomize the number of merchandise transactions (percentage of attendance)
    num_transactions = np.random.randint(int(attendance * 0.1), int(attendance * 0.4))
    
    transactions = []
    for _ in range(num_transactions):
        # Randomize the number of unique items purchased in the transaction
        num_items = np.random.randint(1, 4)  # 1 to 3 unique items per transaction
        selected_items = np.random.choice(merchandise_items['ProductID'], num_items, replace=False)
        
        # Calculate total cost for the transaction
        total_amount = sum(
            np.random.choice(merchandise_items.loc[merchandise_items['ProductID'] == item, 'Price ($)'].values) * 
            np.random.randint(1, 5)  # Quantity between 1 and 4
            for item in selected_items
        )
        
        transactions.append({
            'TransactionID': len(merchandise_sales_data) + len(transactions) + 1,
            'CustomerID': np.random.choice(customers['CustomerID']),
            'GameID': game_id,
            'MerchandisePurchased': ", ".join(
                merchandise_items.loc[merchandise_items['ProductID'].isin(selected_items), 'ProductName']
            ),
            'AmountSpent ($)': total_amount
        })
    
    merchandise_sales_data.extend(transactions)

merchandise_sales = pd.DataFrame(merchandise_sales_data)


In [39]:
import pandas as pd
import numpy as np
from faker import Faker
import os

fake = Faker()
np.random.seed(20)

# File paths for saving/loading data
file_paths = {
    "games": "games.csv",
    "customers": "customers.csv",
    "food_items": "food_items.csv",
    "merchandise_items": "merchandise_items.csv",
    "ticket_sales": "ticket_sales.csv",
    "food_beverage_sales": "food_beverage_sales.csv",
    "merchandise_sales": "merchandise_sales.csv",
}

# Helper function to save a DataFrame to CSV
def save_to_csv(df, filename):
    df.to_csv(filename, index=False)

# Function to generate games table
def generate_games():
    print("Generating games table...")
    games = pd.read_excel('2023-2024-season.xlsx')  # Replace with your actual file path
    games['H/A'] = games['H/A'].replace({'@': 'Away'})
    games.fillna({'H/A': "Home", 'Reg/OT': "Reg", 'Notes': "Regular Season"}, inplace=True)
    games['GameID'] = range(1, len(games) + 1)
    save_to_csv(games, file_paths['games'])
    print("Finished games table...")
    return games

# Function to generate customers table
def generate_customers():
    print("Generating customers table...")
    customers = pd.DataFrame({
        'CustomerID': range(1, 501),
        'Name': [fake.name() for _ in range(500)],
        'Age': np.random.randint(18, 65, 500),
        'TotalSpend ($)': np.random.randint(500, 10000, 500)
    })
    customers['Email'] = customers['Name'].str.replace(' ', '.').str.lower() + "@example.com"
    save_to_csv(customers, file_paths['customers'])
    print("Finished customers table...")
    return customers

# Function to generate food items table
def generate_food_items():
    print("Generating food items table...")
    food_items = pd.DataFrame({
        'FoodItemID': range(1, 16),
        'FoodName': ['Hot Dog', 'Nachos', 'Burger', 'Fries', 'Pizza Slice', 
                     'Popcorn', 'Pretzel', 'Chicken Tenders', 'Soda', 'Beer', 
                     'Wine', 'Water Bottle', 'Candy', 'Ice Cream', 'Coffee'],
        'Price ($)': [5, 7, 10, 4, 8, 6, 5, 12, 3, 8, 10, 2, 3, 6, 4]
    })
    save_to_csv(food_items, file_paths['food_items'])
    print("Finished food items table...")
    return food_items

# Function to generate merchandise items table
def generate_merchandise_items():
    print("Generating merchandise items table...")
    merchandise_items = pd.DataFrame({
        'ProductID': range(1, 19),
        'ProductName': ['Home Jersey', 'Away Jersey', 'Baseball Cap', 'Scarf', 'Keychain',
                        'Mug', 'Sticker Pack', 'Autographed Ball', 'Team Poster', 'Backpack',
                        'Hoodie', 'T-Shirt', 'Team Jacket', 'Bobblehead', 'Phone Case',
                        'Socks', 'Team Flag', 'Water Bottle'],
        'Price ($)': [80, 85, 25, 20, 10, 15, 5, 150, 12, 50, 60, 25, 100, 30, 18, 10, 20, 12]
    })
    save_to_csv(merchandise_items, file_paths['merchandise_items'])
    print("Finished merchandise items table...")
    return merchandise_items

# Function to generate ticket sales table
def generate_ticket_sales():
    print("Generating ticket sales table...")
    games = pd.read_csv(file_paths['games'])
    customers = pd.read_csv(file_paths['customers'])
    seat_tiers = {
        "Courtside": 500, "Courtside Suites": 450, "Lower Bowl": 250,
        "Suites": 350, "Upper Bowl": 100
    }
    ticket_sales = []
    for game_id, attendance in zip(games['GameID'], games['Attendance']):
        tickets = (
            [['Courtside', seat_tiers['Courtside']] for _ in range(int(attendance * 0.1))] +
            [['Courtside Suites', seat_tiers['Courtside Suites']] for _ in range(int(attendance * 0.05))] +
            [['Lower Bowl', seat_tiers['Lower Bowl']] for _ in range(int(attendance * 0.3))] +
            [['Suites', seat_tiers['Suites']] for _ in range(int(attendance * 0.2))] +
            [['Upper Bowl', seat_tiers['Upper Bowl']] for _ in range(attendance - int(attendance * 0.65))]
        )
        df = pd.DataFrame(tickets, columns=['SeatingType', 'TicketPrice ($)'])
        df['GameID'] = game_id
        df['CustomerID'] = np.random.choice(customers['CustomerID'], len(df))
        ticket_sales.append(df)
    tickets_table = pd.concat(ticket_sales, ignore_index=True)
    tickets_table['TicketID'] = range(1, len(tickets_table) + 1)
    save_to_csv(tickets_table, file_paths['ticket_sales'])
    print("Finished ticket sales table...")
    return tickets_table

# Function to generate food and beverage sales table
def generate_food_beverage_sales():
    print("Generating food and beverage sales table...")
    games = pd.read_csv(file_paths['games'])
    customers = pd.read_csv(file_paths['customers'])
    food_items = pd.read_csv(file_paths['food_items'])
    food_beverage_sales = []
    for game_id, attendance in zip(games['GameID'], games['Attendance']):
        num_transactions = np.random.randint(int(attendance * 0.2), int(attendance * 0.5))
        transactions = []
        for _ in range(num_transactions):
            num_items = np.random.randint(1, 4)
            selected_items = np.random.choice(food_items['FoodItemID'], num_items, replace=False)
            total_amount = sum(
                np.random.choice(food_items.loc[food_items['FoodItemID'] == item, 'Price ($)'].values) * 
                np.random.randint(1, 5) for item in selected_items
            )
            transactions.append({
                'OrderID': len(food_beverage_sales) + len(transactions) + 1,
                'CustomerID': np.random.choice(customers['CustomerID']),
                'GameID': game_id,
                'FoodItemsPurchased': ", ".join(
                    food_items.loc[food_items['FoodItemID'].isin(selected_items), 'FoodName']
                ),
                'AmountSpent ($)': total_amount
            })
        food_beverage_sales.extend(transactions)
    food_beverage_sales_table = pd.DataFrame(food_beverage_sales)
    save_to_csv(food_beverage_sales_table, file_paths['food_beverage_sales'])
    print("Finished food and beverage sales table...")
    return food_beverage_sales_table

# Function to generate merchandise sales table
def generate_merchandise_sales():
    print("Generating merchandise sales table...")
    games = pd.read_csv(file_paths['games'])
    customers = pd.read_csv(file_paths['customers'])
    merchandise_items = pd.read_csv(file_paths['merchandise_items'])
    merchandise_sales = []
    for game_id, attendance in zip(games['GameID'], games['Attendance']):
        num_transactions = np.random.randint(int(attendance * 0.1), int(attendance * 0.4))
        transactions = []
        for _ in range(num_transactions):
            num_items = np.random.randint(1, 4)
            selected_items = np.random.choice(merchandise_items['ProductID'], num_items, replace=False)
            total_amount = sum(
                np.random.choice(merchandise_items.loc[merchandise_items['ProductID'] == item, 'Price ($)'].values) * 
                np.random.randint(1, 5) for item in selected_items
            )
            transactions.append({
                'TransactionID': len(merchandise_sales) + len(transactions) + 1,
                'CustomerID': np.random.choice(customers['CustomerID']),
                'GameID': game_id,
                'MerchandisePurchased': ", ".join(
                    merchandise_items.loc[merchandise_items['ProductID'].isin(selected_items), 'ProductName']
                ),
                'AmountSpent ($)': total_amount
            })
        merchandise_sales.extend(transactions)
    merchandise_sales_table = pd.DataFrame(merchandise_sales)
    save_to_csv(merchandise_sales_table, file_paths['merchandise_sales'])
    print("Finished merchandise sales table...")
    return merchandise_sales_table

# Main function to call all table generation functions
def main():
    generate_games()
    generate_customers()
    generate_food_items()
    generate_merchandise_items()
    generate_ticket_sales()
    generate_food_beverage_sales()
    generate_merchandise_sales()
    print("All tables have been generated and saved.")

if __name__ == "__main__":
    main()

Generating games table...
Finished games table...
Generating customers table...
Finished customers table...
Generating food items table...
Finished food items table...
Generating merchandise items table...
Finished merchandise items table...
Generating ticket sales table...
Finished ticket sales table...
Generating food and beverage sales table...
Finished food and beverage sales table...
Generating merchandise sales table...
Finished merchandise sales table...
All tables have been generated and saved.


In [None]:
games.info()
games.head()

In [None]:
customers.info()
customers.head()

In [None]:
ticket_sales.info()
ticket_sales.head()

In [None]:
merchandise_sales.info()
merchandise_sales.head()

In [None]:
food_beverage_sales.info()
food_beverage_sales.head()