In [5]:
# Imports
from numpy import unique
import pandas as pd
from keras.layers import Input, Concatenate, Dense, Embedding, Flatten
from keras.models import Model

In [6]:
games_season = pd.read_csv('data/basketball_data/games_season.csv')
games_tourney = pd.read_csv('data/basketball_data/games_tourney.csv')

In [7]:
# Count the unique number of teams
n_teams = unique(games_season['team_1']).shape[0]

# Create an embedding layer
team_lookup = Embedding(input_dim=n_teams,
                        output_dim=1,
                        input_length=1,
                        name='Team-Strength')

In [8]:
# Create an input layer for the team ID
teamid_in = Input(shape=(1,))

# Lookup the input in the team strength embedding layer
strength_lookup = team_lookup(teamid_in)

# Flatten the output
strength_lookup_flat = Flatten()(strength_lookup)

# Combine the operations into a single, re-usable model
team_strength_model = Model(teamid_in, strength_lookup_flat, name='Team-Strength-Model')

In [9]:

# Create an Input for each team
team_in_1 = Input(shape=(1,), name='Team-1-In')
team_in_2 = Input(shape=(1,), name='Team-2-In')

# Create an input for home vs away
home_in = Input(shape=(1,), name='Home-In')

# Lookup the team inputs in the team strength model
team_1_strength = team_strength_model(team_in_1)
team_2_strength = team_strength_model(team_in_2)

# Combine the team strengths with the home input using a Concatenate layer, then add a Dense layer
out = Concatenate()([team_1_strength, team_2_strength, home_in])
out = Dense(1)(out)

In [10]:
# Make a Model
model = Model([team_in_1, team_in_2, home_in], out)

# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error')

In [11]:
# Fit the model to the games_season dataset
model.fit([games_season['team_1'], games_season['team_2'], games_season['home']],
          games_season['score_diff'],
          epochs=1,
          verbose=True,
          validation_split=.1,
          batch_size=2048)

# Evaluate the model on the games_tourney dataset
print(model.evaluate(
    [games_tourney['team_1'], games_tourney['team_2'], games_tourney['home']], 
    games_tourney['score_diff'], 
    verbose=False
))

11.684514045715332


In [12]:
# Predict
games_tourney['pred'] = model.predict(
    [
        games_tourney['team_1'],
        games_tourney['team_2'],
        games_tourney['home']
    ]
)

In [13]:
# Create an input layer with 3 columns
input_tensor = Input((3,))

# Pass it to a Dense layer with 1 unit
output_tensor = Dense(1)(input_tensor)

# Create a model
model = Model(input_tensor, output_tensor)

# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error')

In [14]:
games_tourney_train = games_tourney.loc[games_tourney['season'] < 2017]
games_tourney_test = games_tourney.loc[games_tourney['season'] == 2017]

In [15]:
# Fit the model
model.fit(games_tourney_train[['home', 'seed_diff', 'pred']],
          games_tourney_train['score_diff'],
          epochs=1,
          verbose=True)



<keras.callbacks.History at 0x1f6824e6ac0>

In [17]:
# Evaluate the model on the games_tourney_test dataset
print(model.evaluate(games_tourney_test[['home', 'seed_diff','pred']],
               games_tourney_test['score_diff'], verbose=False))

11.233369827270508
