In [27]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from skopt import BayesSearchCV

In [54]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [15]:
file_path = "matchups.csv"

In [16]:
matchups = pd.read_csv(file_path)

In [17]:
matchups.head()

Unnamed: 0.1,Unnamed: 0,TEAM NO,DUNKS FG%,DUNKS SHARE,DUNKS FG%D,DUNKS D SHARE,CLOSE TWOS FG%,BADJ EM_x,BADJ O_x,BADJ D_x,...,YEAR,BY YEAR NO,BY ROUND NO,TEAM,SEED,ROUND,CURRENT ROUND,SCORE,GAME ID,OUTCOME
0,0,1011,88.7,13.0,85.2,5.3,60.7,33.0,121.6,88.6,...,2023,1888,1888.0,Alabama,1,16,64,96.0,74,W
1,1,955,90.0,1.6,97.6,4.1,56.5,-1.2,107.8,109.0,...,2023,1887,1887.0,Texas A&M Corpus Chris,16,64,64,75.0,74,L
2,2,979,80.2,6.7,86.7,5.8,59.9,20.8,118.1,97.3,...,2023,1886,1886.0,Maryland,8,32,64,67.0,75,W
3,3,945,89.7,6.3,92.8,6.9,60.3,21.8,117.4,95.6,...,2023,1885,1885.0,West Virginia,9,64,64,65.0,75,L
4,4,961,87.5,7.8,75.0,5.4,61.5,25.7,113.8,88.1,...,2023,1884,1884.0,San Diego St.,5,2,64,63.0,76,W


In [35]:
rows_with_nan = matchups[matchups.isna().any(axis=1)]
print(rows_with_nan)

Empty DataFrame
Columns: [Unnamed: 0, TEAM NO, DUNKS FG%, DUNKS SHARE, DUNKS FG%D, DUNKS D SHARE, CLOSE TWOS FG%, BADJ EM_x, BADJ O_x, BADJ D_x, BARTHAG_x, BADJ EM_y, BADJ O_y, BADJ D_y, BARTHAG_y, YEAR, BY YEAR NO, BY ROUND NO, TEAM, SEED, ROUND, CURRENT ROUND, SCORE, GAME ID, OUTCOME]
Index: []

[0 rows x 25 columns]


In [18]:
X = matchups.drop(columns=['OUTCOME', "TEAM", "ROUND", "BY YEAR NO", "BY ROUND NO", "SCORE"])
y = matchups['OUTCOME']

In [38]:
print(X.isna().sum())

Unnamed: 0        0
TEAM NO           0
DUNKS FG%         0
DUNKS SHARE       0
DUNKS FG%D        0
DUNKS D SHARE     0
CLOSE TWOS FG%    0
BADJ EM_x         0
BADJ O_x          0
BADJ D_x          0
BARTHAG_x         0
BADJ EM_y         0
BADJ O_y          0
BADJ D_y          0
BARTHAG_y         0
YEAR              0
SEED              0
CURRENT ROUND     0
GAME ID           0
dtype: int64


In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [64]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier

In [71]:
def create_baseline_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [72]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', KerasClassifier(build_fn=create_baseline_model, verbose=0, dropout_rate=0.0, learning_rate=0.01, hidden_layers=(64,32)))
])

In [89]:
parameters = {
    'clf__batch_size': [16, 32, 64],
    'clf__epochs': [10, 20, 50, 100],
    'clf__hidden_layers': [(64,), (128,), (64, 32), (128, 64)],
    'clf__learning_rate': [0.001, 0.01, 0.1],
    'clf__dropout_rate': [0.0, 0.1, 0.2],
}

In [90]:
grid_search = GridSearchCV(estimator=pipeline, param_grid=parameters, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 1296 candidates, totalling 6480 fits


ValueError: Invalid parameter dropout_rate for estimator KerasClassifier.
This issue can likely be resolved by setting this parameter in the KerasClassifier constructor:
`KerasClassifier(dropout_rate=0.0)`
Check the list of available parameters with `estimator.get_params().keys()`

In [85]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_

In [86]:
print("Best Parameters:", best_params)
print("Best Accuracy:", best_score)

Best Parameters: {'clf__batch_size': 64, 'clf__epochs': 10}
Best Accuracy: 0.6628089263256413


In [87]:
best_model = grid_search.best_estimator_

In [88]:
best_model.fit(X_train, y_train)

# Evaluate the model on the test set
test_accuracy = best_model.score(X_test, y_test)

print("Test Accuracy:", test_accuracy)

  X, y = self._initialize(X, y)


Test Accuracy: 0.7317073170731707


In [55]:
class NN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [56]:
def train_nn(X_train, y_train, X_val, y_val, params):
    input_dim = X_train.shape[1]
    output_dim = len(torch.unique(y_train))

    model = NN(input_dim, params['hidden_dim'], output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

    for epoch in range(params['num_epochs']):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        _, predicted = torch.max(outputs, 1)
        accuracy = accuracy_score(predicted.numpy(), y_val.numpy())
    return accuracy

In [57]:
param_grid = {
    'hidden_dim': [5, 10, 20],
    'learning_rate': [0.001, 0.01, 0.1],
    'num_epochs': [50, 100, 200]
}

In [58]:
grid_search = GridSearchCV(estimator=train_nn, param_grid=param_grid, cv=3)
grid_search.fit(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train))

ValueError: could not determine the shape of object type 'DataFrame'

In [None]:
best_params = grid_search.best_params_
best_accuracy = train_nn(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train),
                         torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val),
                         best_params)
print("Best hyperparameters:", best_params)
print("Validation accuracy with best hyperparameters:", best_accuracy)

In [None]:
grid_search = GridSearchCV(estimator=train_nn, param_grid=param_grid, cv=3)
grid_search.fit(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train))

In [53]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, y_train)

ValueError: Invalid parameter lr for estimator KerasClassifier.
This issue can likely be resolved by setting this parameter in the KerasClassifier constructor:
`KerasClassifier(lr=0.001)`
Check the list of available parameters with `estimator.get_params().keys()`

In [49]:
model.get_params().keys()

dict_keys(['model', 'build_fn', 'warm_start', 'random_state', 'optimizer', 'loss', 'metrics', 'batch_size', 'validation_batch_size', 'verbose', 'callbacks', 'validation_split', 'shuffle', 'run_eagerly', 'epochs', 'class_weight'])

In [None]:
# Print the best hyperparameters found
print("Best hyperparameters found:")
print(grid_search.best_params_)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [30]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from sklearn.model_selection import cross_val_score

In [41]:
keras_model = KerasClassifier(build_fn=nn_classifier, epochs=100, batch_size=32, verbose=0)

In [42]:
param_grid = {
    'neurons_per_layer': [32, 64, 128],
    'lr': [0.001, 0.01, 0.1],
    'reg_rate': [0.001, 0.01, 0.1],
    'num_hidden_layers': [1, 2, 3]
}

In [43]:
grid_search = GridSearchCV(estimator=keras_model, param_grid=param_grid, cv=3, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 81 candidates, totalling 243 fits


ValueError: Invalid parameter lr for estimator KerasClassifier.
This issue can likely be resolved by setting this parameter in the KerasClassifier constructor:
`KerasClassifier(lr=0.001)`
Check the list of available parameters with `estimator.get_params().keys()`

In [None]:
print("Best hyperparameters found:")
print(grid_search.best_params_)

In [None]:
keras_model = KerasClassifier(build_fn=nn_classifier, epochs=100, batch_size=32, verbose=0)

In [None]:
model = nn_classifier(neurons_per_layer=64, lr=0.001)

In [None]:

y_pred = best_model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on test set:", accuracy)