In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [4]:
accounts = pd.read_csv('../SalvinDirectory/accounts_knn.csv')

In [5]:
print(accounts.shape)
accounts.head()

(44211, 14)


Unnamed: 0,Season,AccountNumber,SingleGameTickets,PartialPlanTickets,GroupTickets,STM,AvgSpend,GamesAttended,FanSegment,DistanceToArena,BasketballPropensity,SocialMediaEngagement,BasketballPropenstiyFill,DistanceToArenaFill
0,2023,1,0,0,0,0,467.0,0,F,12.0,872.0,1,872.0,12.0
1,2023,2,2,0,0,0,116.0,1,A,47.0,485.0,1,485.0,47.0
2,2023,3,3,0,0,0,107.0,1,B,6.0,896.0,1,896.0,6.0
3,2023,4,0,0,3,0,27.0,1,C,3.0,467.0,3,467.0,3.0
4,2023,5,0,0,2,0,14.0,1,A,4.0,582.0,2,582.0,4.0


In [6]:
# accounts['SocialMediaEngagement'] = accounts['SocialMediaEngagement'].str.replace('Low', '0')
# accounts['SocialMediaEngagement'] = accounts['SocialMediaEngagement'].str.replace('Medium', '1')
# accounts['SocialMediaEngagement'] = accounts['SocialMediaEngagement'].str.replace('High', '2')

accounts['PartialPlanTickets'] = np.where(accounts['PartialPlanTickets'] > 0, 1, 0)

In [32]:
train, test = train_test_split(accounts, test_size=0.2, stratify=accounts['PartialPlanTickets'], random_state=42)

train.shape, test.shape

train.dropna(inplace=True)
test.dropna(inplace=True)

In [34]:
random_forest = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

random_forest.fit(train[['AvgSpend','BasketballPropenstiyFill','SingleGameTickets','GroupTickets','STM','GamesAttended','DistanceToArenaFill','SocialMediaEngagement']], train['PartialPlanTickets'])

random_forest_pred = random_forest.predict(test[['AvgSpend','BasketballPropenstiyFill','SingleGameTickets','GroupTickets','STM','GamesAttended','DistanceToArenaFill','SocialMediaEngagement']])

accuracy = accuracy_score(test['PartialPlanTickets'], random_forest_pred)

print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.9677


In [35]:
display(random_forest_pred)
# get accounts where random_forest_pred is 1
partial_ticket_accounts = test[random_forest_pred == 1]
display(partial_ticket_accounts)

array([0, 0, 0, ..., 0, 0, 1])

Unnamed: 0,Season,AccountNumber,SingleGameTickets,PartialPlanTickets,GroupTickets,STM,AvgSpend,GamesAttended,FanSegment,DistanceToArena,BasketballPropensity,SocialMediaEngagement,BasketballPropenstiyFill,DistanceToArenaFill
31342,2024,1537,0,1,0,0,170.500000,5,D,20.0,937.0,1,937.0,20.0
32413,2024,31699,0,1,0,0,310.500000,3,G,315.0,936.0,2,936.0,315.0
20370,2024,20311,0,1,0,0,24.666666,4,D,79.0,515.0,2,515.0,79.0
34906,2024,33742,0,1,0,0,149.000000,5,Limited Data,18.0,621.0,3,621.0,18.0
19350,2024,19306,0,1,0,0,98.680000,9,B,6.0,833.0,2,833.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22651,2024,22567,0,1,0,0,269.454545,6,F,21.0,629.0,2,629.0,21.0
28643,2024,28534,0,1,0,0,154.000000,3,Limited Data,8.0,908.0,2,908.0,8.0
20509,2024,20449,0,0,0,0,9.240000,6,D,21.0,952.0,2,952.0,21.0
43575,2024,42405,0,1,0,0,245.000000,4,E,18.0,892.0,2,892.0,18.0


In [36]:
random_forest.feature_importances_

array([0.0565579 , 0.01707976, 0.22938347, 0.14973243, 0.19966756,
       0.32848542, 0.01810134, 0.00099212])

In [37]:
partial_ticket_accounts.to_csv('partial_ticket_accounts.csv', index=False)

## Working with Seat/Game Data

In [7]:
seats = pd.read_csv('../BucksDatasets/SeatLevel.csv')

seats.head()

Unnamed: 0,Season,AccountNumber,Game,GameDate,GameTier
0,2023,1,2024-01-24 Cleveland Cavaliers,2024-01-24,D
1,2023,1,2024-01-24 Cleveland Cavaliers,2024-01-24,D
2,2023,1,2024-01-24 Cleveland Cavaliers,2024-01-24,D
3,2023,1,2024-01-24 Cleveland Cavaliers,2024-01-24,D
4,2023,1,2024-01-24 Cleveland Cavaliers,2024-01-24,D


In [8]:
games = pd.read_csv('../BucksDatasets/GameLevel.csv')

games.head()

Unnamed: 0,Game,Giveaway
0,2023-10-26 Philadelphia 76ers,
1,2023-10-29 Atlanta Hawks,Cap
2,2023-10-30 Miami Heat,
3,2023-11-03 New York Knicks,
4,2023-11-08 Detroit Pistons,Lunch Bag


In [9]:
games_with_giveaways = games[games['Giveaway'].notna()]
games_with_giveaways.head(19)

Unnamed: 0,Game,Giveaway
1,2023-10-29 Atlanta Hawks,Cap
4,2023-11-08 Detroit Pistons,Lunch Bag
8,2023-11-26 Portland Trail Blazers,Travel Bag
10,2023-12-05 New York Knicks,Nesting Dolls
16,2023-12-21 Orlando Magic,Puzzle
21,2024-01-14 Sacramento Kings,Timepiece
22,2024-01-24 Cleveland Cavaliers,Bucket Cap
26,2024-02-09 Charlotte Hornets,Cap
27,2024-02-12 Denver Nuggets,Antlers
29,2024-02-27 Charlotte Hornets,Belt Bag


In [None]:
# getting accounts that went to games with a giveaway
# can use this data to see which accounts are likely to purchase a 
# 'Promotional Giveaway Inclusive Plan' from our partial ticket plans
seats_with_giveaways = seats[seats['Game'].isin(games_with_giveaways['Game'])]
seats_with_giveaways.head()
seats_with_giveaways.shape

# getting unique accounts
display(seats_with_giveaways['AccountNumber'].nunique())
unique_accounts = seats_with_giveaways['AccountNumber'].unique() # could use with "AccountLevel" data

# showing how many each account purchased
seats_with_giveaways['AccountNumber'].value_counts()



12735

27473    961
19264    664
1522     582
18162    496
21044    416
        ... 
25044      1
36704      1
25072      1
12743      1
1444       1
Name: AccountNumber, Length: 12735, dtype: int64

In [27]:
# getting dates/days of week of games
# could use this data along with seat level data and account data to see if accounts
# are likely to buy partial plan tickets for 'Value Plan' (weekday games) or
# 'Weekend Plan' (weekend games)
games['Date'] = games['Game'].str.split(' ').str[0]

games['Date'] = games['Date'].astype('datetime64')

games['DayOfWeek'] = games['Date'].dt.day_name()
games['DayOfWeekNum'] = games['Date'].dt.weekday

display(games.dtypes)
games.head()


Game                    object
Giveaway                object
Date            datetime64[ns]
DayOfWeek               object
DayOfWeekNum             int64
dtype: object

Unnamed: 0,Game,Giveaway,Date,DayOfWeek,DayOfWeekNum
0,2023-10-26 Philadelphia 76ers,,2023-10-26,Thursday,3
1,2023-10-29 Atlanta Hawks,Cap,2023-10-29,Sunday,6
2,2023-10-30 Miami Heat,,2023-10-30,Monday,0
3,2023-11-03 New York Knicks,,2023-11-03,Friday,4
4,2023-11-08 Detroit Pistons,Lunch Bag,2023-11-08,Wednesday,2


In [None]:
# grouping games by GameTier within 'seats'
# can further extract accounts that are going to the most 'A' and 'B' tier games
# to see if they are interested in 'Marquee Opponent Plan'
a_games = seats[seats['GameTier'] == 'A']
b_games = seats[seats['GameTier'] == 'B']
c_games = seats[seats['GameTier'] == 'C']
d_games = seats[seats['GameTier'] == 'D']

a_games.shape, b_games.shape, c_games.shape, d_games.shape

((65842, 5), (89080, 5), (145515, 5), (193447, 5))