# Model Training

In [None]:
%pip install tensorflow

### Read In Data

In [None]:
import pandas as pd

full_data = pd.read_csv("EngineeredData")
pca_data = pd.read_csv("ProjectedData")

### Final Processing Work

In [None]:
for index, game in full_data.iterrows():
    winner = game['Winner']
    home = game['Home']
    if winner == home:
        full_data.at[index, 'Winner'] = 1
    else:
        full_data.at[index, 'Winner'] = 0
        
full_data['Winner'] = full_data['Winner'].astype(int)
full_data = full_data.drop(columns = ['Unnamed: 0', 'Visitor', 'Home', 'v_goals', 'h_goals', 'Date', 'Season'])



### Normalize Data for Training

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
full_data_normalized = pd.DataFrame(scaler.fit_transform(full_data), columns=full_data.columns)


### Split into Feature Vector and Target

In [None]:
X = full_data.drop(['Winner'], axis=1)  # Drop the target column from the features
y = full_data['Winner']  # Set the target column as the target variable


In [None]:
from sklearn.model_selection import train_test_split

# Assume your input data is called `X` and your target variable is called `y`
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.25, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


### Training Basic Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense

def train_nn(X_train, y_train, num_epochs=10, batch_size=32):
    # Create model
    model = Sequential()
    model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train model
    model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size)

    return model


In [None]:
model = train_nn(X_train, y_train)

# Evaluate the model on the testing data
test_loss, test_acc = model.evaluate(X_test, y_test)

# Print the testing accuracy
print('Testing accuracy:', test_acc)


### Grid Search Training Method to Find Optimal Model

In [None]:
from keras.optimizers import Adam


def train_best_nn(X_train, y_train, X_val, y_val):

    # Define hyperparameters to search over
    hidden_sizes = [32, 64, 128]
    learning_rates = [0.001, 0.01, 0.1]
    num_epochs = 100

    best_loss = float("inf")
    best_model = None
    best_history = None

    # Iterate over hyperparameters to find the best model
    for hidden_size in hidden_sizes:
        for learning_rate in learning_rates:

            # Create model
            model = Sequential()
            model.add(Dense(hidden_size, input_dim=X_train.shape[1], activation="relu"))
            model.add(Dense(1, activation="sigmoid"))

            # Compile model
            model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate), metrics=["accuracy"])

            # Train model
            history = model.fit(X_train, y_train, epochs=num_epochs, validation_data=(X_val, y_val), verbose=0)

            # Evaluate model on validation set
            val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

            # Keep track of best model
            if val_loss < best_loss:
                best_loss = val_loss
                best_model = model
                best_history = history

    return best_model, best_history






In [None]:
best_model, best_history = train_best_nn(X_train, y_train, X_val, y_val)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = best_model.evaluate(X_test, y_test)

print("Test accuracy:", test_acc)


### Saving the Best Model for Prediction Making

In [None]:
best_model.save("Best_Model")