In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

game_data = pd.read_csv("game_data.csv")
player_data = pd.read_csv("player_data.csv")
test_set = pd.read_csv("test_set.csv")
training_set = pd.read_csv("training_set.csv")

In [2]:
# Approach 1: Predict on training set features

# Part 1: Training Set Data Preparation

# Step 1: Create new table w/ Total Viewers column
train_1 = pd.DataFrame(columns=["Season", "Game_ID", "Game_Date", "Away_Team", "Home_Team", "Total_Viewers"])
i = 0
for game_id in np.sort(training_set["Game_ID"].unique()):
    miniframe = training_set.loc[training_set["Game_ID"] == game_id]
    row = pd.DataFrame({"Season": miniframe.iloc[0]["Season"],
                        "Game_ID": game_id,
                        "Game_Date": miniframe.iloc[0]["Game_Date"],
                        "Away_Team": miniframe.iloc[0]["Away_Team"],
                        "Home_Team": miniframe.iloc[0]["Home_Team"],
                        "Total_Viewers": sum(miniframe["Rounded Viewers"])},
                        index=[i])
    train_1 = train_1.append(row)
    i += 1
    if (i == 10): break

train_1.head()

Unnamed: 0,Away_Team,Game_Date,Game_ID,Home_Team,Season,Total_Viewers
0,NYK,10/25/2016,21600001,CLE,2016-17,57691
1,UTA,10/25/2016,21600002,POR,2016-17,40740
2,SAS,10/25/2016,21600003,GSW,2016-17,60428
3,MIA,10/26/2016,21600004,ORL,2016-17,22756
4,DAL,10/26/2016,21600005,IND,2016-17,33922


In [3]:
# Step 2: One Hot Encode Home and Away Teams
from sklearn.preprocessing import MultiLabelBinarizer
one_hot_encoder = MultiLabelBinarizer()

# Create Tuple Column (Teams) from Away and Home columns, then one hot encode
train_1["Teams"] = list(zip(train_1["Away_Team"], train_1["Home_Team"]))
train_1["Teams"] = list(one_hot_encoder.fit_transform(train_1["Teams"]))

# Drop Home Team, Away Team, Game ID columns. The raw categorical labels are not useful for classification
train_2 = train_1.drop(["Away_Team", "Home_Team", "Game_ID"], axis=1)
train_2

Unnamed: 0,Game_Date,Season,Total_Viewers,Teams
0,10/25/2016,2016-17,57691,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1,10/25/2016,2016-17,40740,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,10/25/2016,2016-17,60428,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,10/26/2016,2016-17,22756,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, ..."
4,10/26/2016,2016-17,33922,"[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
5,10/26/2016,2016-17,23947,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
6,10/26/2016,2016-17,14887,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
7,10/26/2016,2016-17,12046,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
8,10/26/2016,2016-17,21853,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ..."
9,10/26/2016,2016-17,42455,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
