In [1]:
import pandas as pd
import numpy as np

# Load the CSV file
df = pd.read_csv("accountlevelcleaned.csv")
df.head()


Unnamed: 0,Season,AccountNumber,SingleGameTickets,PartialPlanTickets,GroupTickets,AvgSpend,GamesAttended,FanSegment,DistanceToArena,BasketballPropensity,SocialMediaEngagement,SumGamesAttended,TotNumTicketsPurchased
0,2023,1,0,0,0,467.0,0,F,12.0,872.0,Low,1,24
1,2023,2,2,0,0,116.0,1,A,47.0,485.0,Low,2,4
2,2023,3,3,0,0,107.0,1,B,6.0,896.0,Low,1,3
3,2023,4,0,0,3,27.0,1,C,3.0,467.0,High,1,3
4,2023,5,0,0,2,14.0,1,A,4.0,582.0,Medium,1,2


In [2]:
# Calculate the maximum AvgSpend
max_avg_spend = df['AvgSpend'].max()

# Create a new column 'AvgSpend_norm' by dividing AvgSpend by the maximum value
df['AvgSpend_norm'] = df['AvgSpend'] / max_avg_spend


In [3]:
# Define the mapping for social media engagement levels
engagement_map = {'Low': 0.33, 'Medium': 0.67, 'High': 1}

# Create a new column 'SocialMediaEngagement_norm' by mapping the existing values
df['SocialMediaEngagement_norm'] = df['SocialMediaEngagement'].map(engagement_map)


In [4]:
# Create a new column 'DistanceToArena_norm' based on the distance conditions:
# - Less than 20 -> 1
# - Between 20 and 50 (inclusive) -> 0.5
# - Over 50 -> 0
df['DistanceToArena_norm'] = np.where(df['DistanceToArena'] < 20, 1, 
                                        np.where(df['DistanceToArena'] <= 50, 0.5, 0))


In [5]:
# Create a new column 'BasketballPropensity_norm' by dividing BasketballPropensity by 1000
df['BasketballPropensity_norm'] = df['BasketballPropensity'] / 1000.0


In [6]:
# Display the first few rows of the DataFrame to verify the new columns
df.head()


Unnamed: 0,Season,AccountNumber,SingleGameTickets,PartialPlanTickets,GroupTickets,AvgSpend,GamesAttended,FanSegment,DistanceToArena,BasketballPropensity,SocialMediaEngagement,SumGamesAttended,TotNumTicketsPurchased,AvgSpend_norm,SocialMediaEngagement_norm,DistanceToArena_norm,BasketballPropensity_norm
0,2023,1,0,0,0,467.0,0,F,12.0,872.0,Low,1,24,0.141644,0.33,1.0,0.872
1,2023,2,2,0,0,116.0,1,A,47.0,485.0,Low,2,4,0.035184,0.33,0.5,0.485
2,2023,3,3,0,0,107.0,1,B,6.0,896.0,Low,1,3,0.032454,0.33,1.0,0.896
3,2023,4,0,0,3,27.0,1,C,3.0,467.0,High,1,3,0.008189,1.0,1.0,0.467
4,2023,5,0,0,2,14.0,1,A,4.0,582.0,Medium,1,2,0.004246,0.67,1.0,0.582


In [7]:
# Save the DataFrame with normalized columns to a new CSV file
df.to_csv("AccountLevelNormalized.csv", index=False)
