In [64]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from keras.layers import Input, Dense, Dropout, Flatten, Embedding, merge
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.models import Model



In [65]:
df = pd.read_csv('./DataFiles/RegularSeasonDetailedResults.csv')
df.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,...,24,9,20,20,25,7,12,8,6,16
2,2003,11,1266,73,1437,61,N,0,24,58,...,26,14,23,31,22,9,12,2,5,23
3,2003,11,1296,56,1457,50,N,0,18,38,...,22,8,15,17,20,9,19,4,3,23
4,2003,11,1400,77,1208,71,N,0,30,61,...,16,17,27,21,15,12,10,7,1,14


In [66]:
simple_df_1 = pd.DataFrame()
simple_df_1[['Team', 'Opponent', 'Season']] = df[['WTeamID', 'LTeamID', 'Season']].copy()
simple_df_1['Prediction'] = 1

simple_df_2 = pd.DataFrame()
simple_df_2[['Team', 'Opponent', 'Season']] = df[['LTeamID', 'WTeamID', 'Season']]
simple_df_2['Prediction'] = 0

new_df = pd.concat((simple_df_1, simple_df_2), axis=0)
new_df.tail()

Unnamed: 0,Team,Opponent,Season,Prediction
76631,1458,1276,2017,0
76632,1463,1343,2017,0
76633,1433,1348,2017,0
76634,1153,1374,2017,0
76635,1402,1407,2017,0


In [67]:
n = new_df.Team.nunique()
n

355

In [68]:
trans_dict = {t: i for i, t in enumerate(new_df.Team.unique())}
new_df["Team"] = new_df["Team"].apply(lambda x: trans_dict[x])
new_df["Opponent"] = new_df["Opponent"].apply(lambda x: trans_dict[x])
new_df.head()

Unnamed: 0,Team,Opponent,Season,Prediction
0,0,67,2003,1
1,1,164,2003,1
2,2,93,2003,1
3,3,221,2003,1
4,4,110,2003,1


In [69]:
train = new_df.values
np.random.shuffle(train)

In [70]:
def embedding_input(name, n_in, n_out, reg):
    inp = Input(shape=(1,), dtype="int64", name=name)
    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)

def create_bias(inp, n_in):
    x = Embedding(n_in, 1, input_length=1)(inp)
    return Flatten()(x)

In [71]:
n_factors = 50

team1_in, t1 = embedding_input("team1_in", n, n_factors, 1e-4)
team2_in, t2 = embedding_input("team2_in", n, n_factors, 1e-4)

b1 = create_bias(team1_in, n)
b2 = create_bias(team2_in, n)

  This is separate from the ipykernel package so we can avoid doing imports until


In [72]:
x = merge([t1, t2], mode = 'dot')
x = Flatten()(x)
x = merge([x, b1], mode = 'sum')
x = merge([x, b2], mode = 'sum')
x = Dense(1, activation='sigmoid')(x)
model = Model([team1_in, team2_in], x)
model.compile(Adam(0.001), loss = 'binary_crossentropy')

  """Entry point for launching an IPython kernel.
  name=name)
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [73]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
team1_in (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
team2_in (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 1, 50)        17750       team1_in[0][0]                   
__________________________________________________________________________________________________
embedding_14 (Embedding)        (None, 1, 50)        17750       team2_in[0][0]                   
__________________________________________________________________________________________________
merge_7 (M

In [74]:
history = model.fit([train[:, 0], train[:, 1]], train[:, 2], batch_size=64, nb_epoch=10, verbose=2)

  """Entry point for launching an IPython kernel.


Epoch 1/10
 - 5s - loss: -2.4595e+04
Epoch 2/10
 - 4s - loss: -3.2031e+04
Epoch 3/10
 - 4s - loss: -3.2031e+04
Epoch 4/10
 - 4s - loss: -3.2031e+04
Epoch 5/10
 - 4s - loss: -3.2031e+04
Epoch 6/10
 - 4s - loss: -3.2031e+04
Epoch 7/10
 - 4s - loss: -3.2031e+04
Epoch 8/10
 - 4s - loss: -3.2031e+04
Epoch 9/10
 - 3s - loss: -3.2031e+04
Epoch 10/10
 - 4s - loss: -3.2032e+04


In [75]:
submission = pd.read_csv('./SampleSubmissionStage1.csv')
submission['Team'] = submission['ID'].apply(lambda x: trans_dict[int(x.split('_')[1])])
submission['Opponent'] = submission['ID'].apply(lambda x: trans_dict[int(x.split('_')[2])])
submission.head()

Unnamed: 0,ID,Pred,Team,Opponent
0,2014_1107_1110,0.5,294,265
1,2014_1107_1112,0.5,294,97
2,2014_1107_1113,0.5,294,25
3,2014_1107_1124,0.5,294,98
4,2014_1107_1140,0.5,294,33


In [76]:
submission['Pred'] = model.predict([submission.Team, submission.Opponent])
submission = submission[['ID', 'Pred']]
submission.head()

Unnamed: 0,ID,Pred
0,2014_1107_1110,1.0
1,2014_1107_1112,1.0
2,2014_1107_1113,1.0
3,2014_1107_1124,1.0
4,2014_1107_1140,1.0


In [78]:
submission.to_csv('keras_submission_2', index=False)