In [4]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = "AccountLevelFinal.csv"
df = pd.read_csv(file_path)

# Function for min-max normalization
def min_max_normalize(series):
    return (series - series.min()) / (series.max() - series.min())

# Normalize relevant columns
df["AvgSpend_norm"] = min_max_normalize(df["AvgSpend"])
df["GamesAttended_norm"] = min_max_normalize(df["GamesAttended"])
df["BasketballPropensity_norm"] = df["BasketballPropensity"] / 1000  # Already between 0-1000

# Map categorical social media engagement levels to numerical values
social_media_mapping = {"low": 0.3, "medium": 0.6, "high": 1.0}
df["SocialMediaEngagement_norm"] = df["SocialMediaEngagement"].map(social_media_mapping).fillna(0.3)  # Default to 0.3 if missing

# Normalize DistanceToArena using capped values
df["DistanceClose_norm"] = min_max_normalize(df["DistanceToArena"])
df["DistanceFar_norm"] = 1 - df["DistanceClose_norm"]

# Handling potential missing values in fraction-based columns
fraction_columns = [
    "TierCD_Weekday_Fraction", "TierAB_Weekday_Fraction",
    "WeekendFraction", "Tier_AB_Fraction", "GiveawayFraction"
]

for col in fraction_columns:
    df[col] = df[col].fillna(0)  # Replace NaNs with 0 since they indicate no attendance in those categories

# Placeholder for FanSegmentBonus, assuming a predefined mapping
fan_segment_bonus_mapping = {
    "A": 1.0, "B": 0.8, "C": 0.5, "D": 0.3, "F": 0.1
}
df["FanSegmentBonus"] = df["FanSegment"].map(fan_segment_bonus_mapping).fillna(0.1)

# Calculate plan scores based on provided formulas

# Value Plan
df["ValuePlan_Score"] = (
    0.25 * (1 - df["AvgSpend_norm"]) +
    0.25 * df["TierCD_Weekday_Fraction"] +
    0.15 * df["DistanceClose_norm"] +
    0.15 * (1 - df["GamesAttended_norm"]) +
    0.20 * df["FanSegmentBonus"]
)

# Marquee Opponent Plan
df["MarqueePlan_Score"] = (
    0.30 * df["AvgSpend_norm"] +
    0.30 * df["Tier_AB_Fraction"] +
    0.20 * df["BasketballPropensity_norm"] +
    0.20 * df["FanSegmentBonus"]
)

# Weekend Plan
df["WeekendPlan_Score"] = (
    0.50 * df["WeekendFraction"] +
    0.20 * df["DistanceFar_norm"] +
    0.10 * df["AvgSpend_norm"] +
    0.20 * df["FanSegmentBonus"]
)

# Promotional Giveaway Plan
df["PromoPlan_Score"] = (
    0.30 * df["SocialMediaEngagement_norm"] +
    0.50 * df["GiveawayFraction"] +
    0.20 * df["FanSegmentBonus"]
)

# Display updated results
print(df)


       Season  AccountNumber  SingleGameTickets  PartialPlanTickets  \
0        2023              1                  0                   0   
1        2023              2                  2                   0   
2        2023              3                  3                   0   
3        2023              4                  0                   0   
4        2023              5                  0                   0   
...       ...            ...                ...                 ...   
42011    2024          43024                  3                   0   
42012    2024          43025                  2                   0   
42013    2024          43026                  0                   0   
42014    2024          43027                  0                   0   
42015    2024          43028                  2                   0   

       GroupTickets  AvgSpend  GamesAttended    FanSegment  DistanceToArena  \
0                 0    467.00              0             F          