In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
from keras.callbacks import TensorBoard
from keras import regularizers
from keras.wrappers.scikit_learn import KerasClassifier



Using TensorFlow backend.


In [2]:
# get training data loaded in to build classifier
df = pd.read_csv('final_bracket_train.csv')
df.dropna(inplace=True)
df.drop(['Season','MinTeam','MaxTeam'], axis = 1,inplace=True)

In [3]:
y_all = df['MinWin']
x_all = df.drop(['MinWin'], axis = 1)
x_all.shape

(968, 140)

In [4]:
# fit scaler to use later
scaler = preprocessing.StandardScaler().fit(x_all)
x_scale = scaler.transform(x_all)

In [5]:
# split up the data 
x_train, x_test, y_train, y_test = train_test_split(x_scale, y_all, test_size=0.33)

In [23]:
tboard = TensorBoard(log_dir='./logs',write_graph=True)
def build_model():
    """build keras scikitlearn model"""
    model = Sequential()
    model.add(Dense(50,input_shape=(140,),activation='relu', kernel_initializer='glorot_uniform',bias_initializer='glorot_uniform',kernel_regularizer=regularizers.l2()))
    model.add(Dropout(.3))
    model.add(Dense(50,activation='relu', kernel_initializer='glorot_uniform',bias_initializer='glorot_uniform',kernel_regularizer=regularizers.l2()))
    model.add(Dropout(.3))
    model.add(Dense(30,activation='relu', kernel_initializer='glorot_uniform',bias_initializer='glorot_uniform',kernel_regularizer=regularizers.l2()))
    model.add(Dropout(.3))
    model.add(Dense(30,activation='relu', kernel_initializer='glorot_uniform',bias_initializer='glorot_uniform',kernel_regularizer=regularizers.l2()))
    model.add(Dropout(.3))
    model.add(Dense(10,activation='relu', kernel_initializer='glorot_uniform',bias_initializer='glorot_uniform',kernel_regularizer=regularizers.l2()))
    model.add(Dropout(.3))
    model.add(Dense(1,activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
    return model
    

In [24]:
clf = build_model()
clf.fit(x_train,y_train,epochs=300,batch_size=10,verbose=0,validation_data=(x_test,y_test),callbacks=[tboard])

<keras.callbacks.History at 0x21858b72ef0>

In [25]:
#clf.score(x_test,y_test)
clf.evaluate(x_test,y_test, batch_size = 200)



[1.3182627260684967, 0.737500011920929]

In [26]:
print(confusion_matrix(y_test,clf.predict_classes(x_test)))

[[117  46]
 [ 38 119]]


In [27]:
print(classification_report(y_test,clf.predict_classes(x_test)))

             precision    recall  f1-score   support

          0       0.75      0.72      0.74       163
          1       0.72      0.76      0.74       157

avg / total       0.74      0.74      0.74       320



In [28]:
output = pd.read_csv('final_bracket_predict.csv')
out_teams = output[['MinTeam','MaxTeam']]
output.drop(['Season','MinTeam','MaxTeam','MinWin'],axis=1, inplace=True)
out_teams.head()

Unnamed: 0,MinTeam,MaxTeam
0,1104,1112
1,1104,1438
2,1112,1438
3,1113,1438
4,1116,1438


In [29]:
output.head()

Unnamed: 0,min.g1.OffRtg,min.g1.DefRtg,min.g1.NetRtg,min.g1.AstR,min.g1.TOR,min.g1.TSP,min.g1.eFGP,min.g1.FTAR,min.g1.ORP,min.g1.DRP,...,pom.min.NCSOS_AdjEM,pom.max.AdjEM,pom.max.AdjO,pom.max.AdjD,pom.max.AdjT,pom.max.Luck,pom.max.SOS_AdjEM,pom.max.SOS_OppO,pom.max.SOS_OppD,pom.max.NCSOS_AdjEM
0,99.193044,125.737661,-26.544617,15.10574,18.342685,57.555123,0.548077,0.423077,0.206897,0.707317,...,3.02,19.37,119.0,99.6,67.3,0.025,6.33,108.9,102.6,2.82
1,99.193044,125.737661,-26.544617,15.10574,18.342685,57.555123,0.548077,0.423077,0.206897,0.707317,...,3.02,32.15,116.5,84.4,59.2,0.032,9.99,110.9,100.9,0.22
2,111.575264,96.698562,14.876702,12.146643,14.355124,56.340144,0.526786,0.428571,0.366667,0.766667,...,2.82,32.15,116.5,84.4,59.2,0.032,9.99,110.9,100.9,0.22
3,101.889145,112.377733,-10.488588,13.107722,9.532888,52.37215,0.45614,0.315789,0.166667,0.735294,...,1.53,32.15,116.5,84.4,59.2,0.032,9.99,110.9,100.9,0.22
4,108.131488,130.658881,-22.527393,14.906303,9.582624,50.761421,0.468254,0.285714,0.277778,0.470588,...,1.3,32.15,116.5,84.4,59.2,0.032,9.99,110.9,100.9,0.22


In [30]:
x_pred = scaler.transform(output)

In [31]:
predictions = pd.DataFrame(clf.predict_classes(x_pred),columns=['MinWin'])
predictions.head()

Unnamed: 0,MinWin
0,0
1,0
2,0
3,0
4,1


In [32]:
final_file = pd.concat([out_teams,predictions],axis=1)
final_file.head()

Unnamed: 0,MinTeam,MaxTeam,MinWin
0,1104,1112,0
1,1104,1438,0
2,1112,1438,0
3,1113,1438,0
4,1116,1438,1


In [34]:
final_file.to_csv('keras_predictions.csv')