In [1]:
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [2]:
final_df = pd.read_csv('final_df.csv') 
cols = [0]
final_df.drop(final_df.columns[cols],axis=1,inplace=True)
final_df

Unnamed: 0,home,season_hits_for,season_hits_against,season_runs_for,season_runs_against,season_errors,season_wins,ml_odds,rl_odds,ml_win,rl_win
0,1,0,0,0,0,0,0,110,-165,1,1
1,0,0,0,0,0,0,0,-120,145,0,0
2,1,0,0,0,0,0,0,-119,190,1,1
3,0,0,0,0,0,0,0,109,-230,0,0
4,1,0,0,0,0,0,0,-121,180,0,0
...,...,...,...,...,...,...,...,...,...,...,...
4921,1,716,590,410,262,53,61,-220,-101,1,0
4922,1,821,727,474,357,46,58,141,-131,0,0
4923,0,797,642,453,339,58,50,-156,111,1,1
4924,1,827,740,477,366,47,58,119,-162,0,0


In [3]:
target = final_df["rl_win"]
# target = final_df["ml_win"]
target_names = ["loss", "win"]

In [4]:
data = final_df.drop(columns=['ml_win', 'rl_win'],axis=1)
feature_names = data.columns

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [6]:
param_grid = {
    'n_estimators': [50,100,150,200],
    'criterion': ['gini','entropy']
}

grid = GridSearchCV(RandomForestClassifier(), param_grid, verbose=True, n_jobs=-1)

grid.fit(X_train, y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:   15.4s finished


GridSearchCV(estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'n_estimators': [50, 100, 150, 200]},
             verbose=True)

In [7]:
print(grid.best_params_)

{'criterion': 'entropy', 'n_estimators': 50}


In [8]:
rf = RandomForestClassifier(n_estimators=50,criterion='entropy')
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.549512987012987

In [9]:
from joblib import dump
dump(rf, 'rl_rf.joblib') 

['rl_rf.joblib']

In [10]:
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

deep_model = Sequential()
deep_model.add(Dense(units=18, activation='relu', input_dim=9))
deep_model.add(Dense(units=10, activation='relu'))
deep_model.add(Dense(units=2, activation='softmax'))

In [12]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

In [13]:
deep_model.fit(
    X_train,
    y_train_categorical,
    epochs=50,
    shuffle=True,
    verbose=2
)

Train on 3694 samples
Epoch 1/50
3694/3694 - 1s - loss: 6.1050 - accuracy: 0.5682
Epoch 2/50
3694/3694 - 0s - loss: 3.9924 - accuracy: 0.5447
Epoch 3/50
3694/3694 - 0s - loss: 1.1589 - accuracy: 0.5181
Epoch 4/50
3694/3694 - 0s - loss: 0.7911 - accuracy: 0.5279
Epoch 5/50
3694/3694 - 0s - loss: 0.7617 - accuracy: 0.5284
Epoch 6/50
3694/3694 - 0s - loss: 0.7412 - accuracy: 0.5379
Epoch 7/50
3694/3694 - 0s - loss: 0.7313 - accuracy: 0.5409
Epoch 8/50
3694/3694 - 0s - loss: 0.7174 - accuracy: 0.5460
Epoch 9/50
3694/3694 - 0s - loss: 0.7079 - accuracy: 0.5417
Epoch 10/50
3694/3694 - 0s - loss: 0.7058 - accuracy: 0.5436
Epoch 11/50
3694/3694 - 0s - loss: 0.7001 - accuracy: 0.5455
Epoch 12/50
3694/3694 - 0s - loss: 0.6971 - accuracy: 0.5574
Epoch 13/50
3694/3694 - 0s - loss: 0.6904 - accuracy: 0.5604
Epoch 14/50
3694/3694 - 0s - loss: 0.6937 - accuracy: 0.5525
Epoch 15/50
3694/3694 - 0s - loss: 0.6899 - accuracy: 0.5574
Epoch 16/50
3694/3694 - 0s - loss: 0.6868 - accuracy: 0.5631
Epoch 17/50

<tensorflow.python.keras.callbacks.History at 0x1a3b538f60>

In [14]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1232/1232 - 0s - loss: 0.6812 - accuracy: 0.5714
Deep Neural Network - Loss: 0.6812222623205805, Accuracy: 0.5714285969734192


In [15]:
def save_model(model):
    # saving model
    json_model = model.to_json()
    open('rl_model_architecture.json', 'w').write(json_model)
    # saving weights
    model.save_weights('rl_model_weights.h5', overwrite=True)

In [16]:
save_model(deep_model)