# Optuna Trials For Baseline Model

In [87]:
# Standard library imports
import datetime
import os
from collections import deque
import time

# Third-party imports
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from tqdm import tqdm

if os.path.exists('/workspace/data'):
    # Load the dictionary of DataFrames from the pickle
    data_path = '/workspace/data/'
else:
    data_path = '../data/'
    
if torch.cuda.is_available() == False:
    RuntimeError("GPU detected: False")
    print("GPU detected: False")
else:
    device = torch.device("cuda")
    print("The GPU is detected.")



The GPU is detected.


## Create the model
We make a basic NN for binary classification that takes as input a list of integers that correspond to the out_features of each linear layer. 

In [88]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, in_features, out_features, input_dropout=.2, hidden_dropout=.3):
        """Initializes the model layers.

        Args:
            in_features (int): The number of input features of the dataset.
            out_features (list): The number of units in each linear layer.
        """
        # Call the parent class (nn.Module) initializer first
        super(Model, self).__init__()
        
        layers = []

        # Input dropout layer
        layers.append(nn.Dropout(input_dropout))
    
        # Build layers dynamically
        for out_feature in out_features:
            layers.append(nn.Linear(in_features, out_feature))
            layers.append(nn.BatchNorm1d(out_feature))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(hidden_dropout))
            in_features = out_feature
        
        # Final output layer for binary classification (with 1 output node)
        layers.append(nn.Linear(in_features, 1))
        
        # Store the sequence of layers
        self.sequential = nn.Sequential(*layers)
        
    def forward(self, x):
        """Forward pass of the model."""
        return self.sequential(x)


## Import Data
We create the dataset we are going to train the model on.

In [89]:
df = pd.read_pickle(data_path + 'tournament_sets_with_top_8_df.pkl')
df.head()



Unnamed: 0,key_x,game,tournament_key,winner_id,p1_id,p2_id,p1_score,p2_score,location_names,bracket_name,...,start,p1_rating,p2_rating,p1_updates,p2_updates,top_8,rating_difference,higher_rated_won,more_updates_won,p1_won
45,90101028,melee,s@sh7,Fija,Fija,Sasha,1,0,"[W1, Winners 1, Winners Round 1]",,...,2017-06-13 10:27:01,1667.529088,1500.0,1.0,0.0,False,167.529088,True,True,True
47,90101030,melee,s@sh7,Bird,Empty Spirits,Bird,0,1,"[W1, Winners 1, Winners Round 1]",,...,2017-06-13 10:27:01,1500.0,1622.33761,0.0,2.0,False,122.33761,True,True,False
48,90101031,melee,s@sh7,Stitchface,3551,Stitchface,0,1,"[W1, Winners 1, Winners Round 1]",,...,2017-06-13 10:27:01,1523.49794,1500.0,3.0,0.0,False,23.49794,False,False,False
50,90101033,melee,s@sh7,rodohk,phlops,rodohk,0,1,"[W1, Winners 1, Winners Round 1]",,...,2017-06-13 10:27:01,1252.681917,1500.0,1.0,0.0,False,247.318083,True,False,False
55,90101038,melee,s@sh7,Sorry,Psythr,Sorry,0,1,"[W1, Winners 1, Winners Round 1]",,...,2017-06-13 10:27:01,1400.124736,1500.0,1.0,0.0,False,99.875264,True,False,False


In [90]:
df['rating_difference'].describe()

count    1.676628e+06
mean     2.322831e+02
std      1.905061e+02
min      1.000414e+00
25%      8.691484e+01
50%      1.861117e+02
75%      3.284191e+02
max      1.953188e+03
Name: rating_difference, dtype: float64

In [91]:
df = df[df['top_8'] == False]
# df = df[df['rating_difference'] > 1]
# df = df[df['rating_difference'] < 10]
print(df.shape)
print(df.columns)

features = ['p1_rating', 'p2_rating', 'p1_updates', 'p2_updates']
# features = ['p1_updates', 'p2_updates']
# features = ['p1_rating', 'p2_rating']
print(df.head())
print(df.shape)

# # features = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']

X = df[features].astype(float).values  # Convert to numpy array
y = df['p1_won'].astype(float).values  # Convert to numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=.5, random_state=103)

# # Convert the splits to PyTorch tensors and reshape y to be 2D
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
df[features].head()

(1182459, 25)
Index(['key_x', 'game', 'tournament_key', 'winner_id', 'p1_id', 'p2_id',
       'p1_score', 'p2_score', 'location_names', 'bracket_name',
       'bracket_order', 'set_order', 'best_of', 'game_data', 'key_y', 'start',
       'p1_rating', 'p2_rating', 'p1_updates', 'p2_updates', 'top_8',
       'rating_difference', 'higher_rated_won', 'more_updates_won', 'p1_won'],
      dtype='object')
       key_x   game tournament_key   winner_id          p1_id       p2_id  \
45  90101028  melee          s@sh7        Fija           Fija       Sasha   
47  90101030  melee          s@sh7        Bird  Empty Spirits        Bird   
48  90101031  melee          s@sh7  Stitchface           3551  Stitchface   
50  90101033  melee          s@sh7      rodohk         phlops      rodohk   
55  90101038  melee          s@sh7       Sorry         Psythr       Sorry   

    p1_score  p2_score                    location_names bracket_name  ...  \
45         1         0  [W1, Winners 1, Winners Round 1] 

Unnamed: 0,p1_rating,p2_rating,p1_updates,p2_updates
45,1667.529088,1500.0,1.0,0.0
47,1500.0,1622.33761,0.0,2.0
48,1523.49794,1500.0,3.0,0.0
50,1252.681917,1500.0,1.0,0.0
55,1400.124736,1500.0,1.0,0.0


In [92]:
df = pd.read_pickle(data_path + 'dataset_mini.pkl')

features = list(df.columns)[:-1]
print(features)

X = df[features].astype(float).values  # Convert to numpy array
y = df['winner'].astype(float).values  # Convert to numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=.5, random_state=103)

# # Convert the splits to PyTorch tensors and reshape y to be 2D
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
df[features].head()

['p1_elo', 'p2_elo', 'p1_rd', 'p2_rd', 'p1_updates', 'p2_updates', 'p1_m1_usage', 'p1_m2_usage', 'p1_m3_usage', 'p2_m1_usage', 'p2_m2_usage', 'p2_m3_usage', 'p1/m1/m1_elo', 'p1/m1/m1_updates', 'p1/m1/m2_elo', 'p1/m1/m2_updates', 'p1/m1/m3_elo', 'p1/m1/m3_updates', 'p1/m2/m1_elo', 'p1/m2/m1_updates', 'p1/m2/m2_elo', 'p1/m2/m2_updates', 'p1/m2/m3_elo', 'p1/m2/m3_updates', 'p1/m3/m1_elo', 'p1/m3/m1_updates', 'p1/m3/m2_elo', 'p1/m3/m2_updates', 'p1/m3/m3_elo', 'p1/m3/m3_updates', 'p2/m1/m1_elo', 'p2/m1/m1_updates', 'p2/m1/m2_elo', 'p2/m1/m2_updates', 'p2/m1/m3_elo', 'p2/m1/m3_updates', 'p2/m2/m1_elo', 'p2/m2/m1_updates', 'p2/m2/m2_elo', 'p2/m2/m2_updates', 'p2/m2/m3_elo', 'p2/m2/m3_updates', 'p2/m3/m1_elo', 'p2/m3/m1_updates', 'p2/m3/m2_elo', 'p2/m3/m2_updates', 'p2/m3/m3_elo', 'p2/m3/m3_updates']


Unnamed: 0,p1_elo,p2_elo,p1_rd,p2_rd,p1_updates,p2_updates,p1_m1_usage,p1_m2_usage,p1_m3_usage,p2_m1_usage,...,p2/m2/m2_elo,p2/m2/m2_updates,p2/m2/m3_elo,p2/m2/m3_updates,p2/m3/m1_elo,p2/m3/m1_updates,p2/m3/m2_elo,p2/m3/m2_updates,p2/m3/m3_elo,p2/m3/m3_updates
681,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
682,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
683,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
684,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
685,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0


## Overall ELO and match count

In [93]:
df = pd.read_pickle(data_path + 'dataset.pkl')
df = df[df['p1_updates'] > 10]
df = df[df['p2_updates'] > 10]
print(df.columns)
print(df.shape)
df.head()

Index(['p1_elo', 'p2_elo', 'p1_rd', 'p2_rd', 'p1_updates', 'p2_updates',
       'p1_melee/fox_count', 'p1_melee/falco_count', 'p1_melee/marth_count',
       'p1_melee/sheik_count', 'p1_melee/captainfalcon_count',
       'p1_melee/jigglypuff_count', 'p1_melee/peach_count',
       'p1_melee/luigi_count', 'p1_melee/samus_count',
       'p1_melee/ganondorf_count', 'p1_melee/iceclimbers_count',
       'p1_melee/drmario_count', 'p1_melee/yoshi_count',
       'p1_melee/pikachu_count', 'p1_melee/link_count',
       'p1_melee/mrgameandwatch_count', 'p1_melee/donkeykong_count',
       'p1_melee/mario_count', 'p1_melee/zelda_count', 'p1_melee/roy_count',
       'p1_melee/younglink_count', 'p1_melee/kirby_count',
       'p1_melee/ness_count', 'p1_melee/bowser_count', 'p1_melee/pichu_count',
       'p1_melee/random_count', 'p1_melee/mewtwo_count', 'p2_melee/fox_count',
       'p2_melee/falco_count', 'p2_melee/marth_count', 'p2_melee/sheik_count',
       'p2_melee/captainfalcon_count', 'p2_melee/jig

Unnamed: 0,p1_elo,p2_elo,p1_rd,p2_rd,p1_updates,p2_updates,p1_melee/fox_count,p1_melee/falco_count,p1_melee/marth_count,p1_melee/sheik_count,...,p2_melee/zelda_count,p2_melee/roy_count,p2_melee/younglink_count,p2_melee/kirby_count,p2_melee/ness_count,p2_melee/bowser_count,p2_melee/pichu_count,p2_melee/random_count,p2_melee/mewtwo_count,winner
116151,1912.996025,1861.525794,56.212927,56.672353,11.0,12.0,7.0,25.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
113514,2141.73161,1979.768284,64.188421,57.640754,11.0,11.0,43.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
113434,2166.802439,1888.483601,61.638662,59.798796,12.0,11.0,48.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
113436,2166.802439,1979.18556,61.638662,60.960886,12.0,15.0,48.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
113437,2166.802439,1979.18556,61.638662,60.960886,12.0,15.0,48.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [94]:
features = df.columns[0:-1]
print(features)

X = df[features].astype(float).values  # Convert to numpy array
y = df['winner'].astype(float).values  # Convert to numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=.5, random_state=103)

# # Convert the splits to PyTorch tensors and reshape y to be 2D
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
df[features].head()

Index(['p1_elo', 'p2_elo', 'p1_rd', 'p2_rd', 'p1_updates', 'p2_updates',
       'p1_melee/fox_count', 'p1_melee/falco_count', 'p1_melee/marth_count',
       'p1_melee/sheik_count', 'p1_melee/captainfalcon_count',
       'p1_melee/jigglypuff_count', 'p1_melee/peach_count',
       'p1_melee/luigi_count', 'p1_melee/samus_count',
       'p1_melee/ganondorf_count', 'p1_melee/iceclimbers_count',
       'p1_melee/drmario_count', 'p1_melee/yoshi_count',
       'p1_melee/pikachu_count', 'p1_melee/link_count',
       'p1_melee/mrgameandwatch_count', 'p1_melee/donkeykong_count',
       'p1_melee/mario_count', 'p1_melee/zelda_count', 'p1_melee/roy_count',
       'p1_melee/younglink_count', 'p1_melee/kirby_count',
       'p1_melee/ness_count', 'p1_melee/bowser_count', 'p1_melee/pichu_count',
       'p1_melee/random_count', 'p1_melee/mewtwo_count', 'p2_melee/fox_count',
       'p2_melee/falco_count', 'p2_melee/marth_count', 'p2_melee/sheik_count',
       'p2_melee/captainfalcon_count', 'p2_melee/jig

Unnamed: 0,p1_elo,p2_elo,p1_rd,p2_rd,p1_updates,p2_updates,p1_melee/fox_count,p1_melee/falco_count,p1_melee/marth_count,p1_melee/sheik_count,...,p2_melee/mario_count,p2_melee/zelda_count,p2_melee/roy_count,p2_melee/younglink_count,p2_melee/kirby_count,p2_melee/ness_count,p2_melee/bowser_count,p2_melee/pichu_count,p2_melee/random_count,p2_melee/mewtwo_count
116151,1912.996025,1861.525794,56.212927,56.672353,11.0,12.0,7.0,25.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113514,2141.73161,1979.768284,64.188421,57.640754,11.0,11.0,43.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113434,2166.802439,1888.483601,61.638662,59.798796,12.0,11.0,48.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
113436,2166.802439,1979.18556,61.638662,60.960886,12.0,15.0,48.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113437,2166.802439,1979.18556,61.638662,60.960886,12.0,15.0,48.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [95]:
df = pd.read_pickle(data_path + 'dataset_mini.pkl')

features = list(df.columns)[0:-1]




print(features)

X = df[features].astype(float).values  # Convert to numpy array
y = df['winner'].astype(float).values  # Convert to numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=.5, random_state=103)

# # Convert the splits to PyTorch tensors and reshape y to be 2D
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
df[features].head()

['p1_elo', 'p2_elo', 'p1_rd', 'p2_rd', 'p1_updates', 'p2_updates', 'p1_m1_usage', 'p1_m2_usage', 'p1_m3_usage', 'p2_m1_usage', 'p2_m2_usage', 'p2_m3_usage', 'p1/m1/m1_elo', 'p1/m1/m1_updates', 'p1/m1/m2_elo', 'p1/m1/m2_updates', 'p1/m1/m3_elo', 'p1/m1/m3_updates', 'p1/m2/m1_elo', 'p1/m2/m1_updates', 'p1/m2/m2_elo', 'p1/m2/m2_updates', 'p1/m2/m3_elo', 'p1/m2/m3_updates', 'p1/m3/m1_elo', 'p1/m3/m1_updates', 'p1/m3/m2_elo', 'p1/m3/m2_updates', 'p1/m3/m3_elo', 'p1/m3/m3_updates', 'p2/m1/m1_elo', 'p2/m1/m1_updates', 'p2/m1/m2_elo', 'p2/m1/m2_updates', 'p2/m1/m3_elo', 'p2/m1/m3_updates', 'p2/m2/m1_elo', 'p2/m2/m1_updates', 'p2/m2/m2_elo', 'p2/m2/m2_updates', 'p2/m2/m3_elo', 'p2/m2/m3_updates', 'p2/m3/m1_elo', 'p2/m3/m1_updates', 'p2/m3/m2_elo', 'p2/m3/m2_updates', 'p2/m3/m3_elo', 'p2/m3/m3_updates']


Unnamed: 0,p1_elo,p2_elo,p1_rd,p2_rd,p1_updates,p2_updates,p1_m1_usage,p1_m2_usage,p1_m3_usage,p2_m1_usage,...,p2/m2/m2_elo,p2/m2/m2_updates,p2/m2/m3_elo,p2/m2/m3_updates,p2/m3/m1_elo,p2/m3/m1_updates,p2/m3/m2_elo,p2/m3/m2_updates,p2/m3/m3_elo,p2/m3/m3_updates
681,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
682,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
683,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
684,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0
685,1500.0,1500.0,350.0,350.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0


In [96]:
df

Unnamed: 0,p1_elo,p2_elo,p1_rd,p2_rd,p1_updates,p2_updates,p1_m1_usage,p1_m2_usage,p1_m3_usage,p2_m1_usage,...,p2/m2/m2_updates,p2/m2/m3_elo,p2/m2/m3_updates,p2/m3/m1_elo,p2/m3/m1_updates,p2/m3/m2_elo,p2/m3/m2_updates,p2/m3/m3_elo,p2/m3/m3_updates,winner
681,1500.000000,1500.000000,350.000000,350.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
682,1500.000000,1500.000000,350.000000,350.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,0.0
683,1500.000000,1500.000000,350.000000,350.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
684,1500.000000,1500.000000,350.000000,350.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
685,1500.000000,1500.000000,350.000000,350.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1489901,1770.163715,1386.695651,72.805474,65.897981,9.0,43.0,21.0,20.0,0.0,136.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
1489902,1625.192442,1485.997649,56.094492,81.581109,45.0,11.0,296.0,20.0,2.0,22.0,...,0.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,1.0
1489903,1784.559717,1770.163715,64.115340,72.805474,18.0,9.0,96.0,17.0,7.0,21.0,...,2.0,1654.006739,3.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,0.0
1489904,1625.192442,1770.163715,56.094492,72.805474,45.0,9.0,296.0,20.0,2.0,21.0,...,1.0,1500.000000,0.0,1500.0,0.0,1500.0,0.0,1500.0,0.0,0.0


In [97]:
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.metrics import accuracy_score


# knn = KNeighborsClassifier(n_neighbors=17, n_jobs=-1)
# knn.fit(X_train, y_train)
# predictions = knn.predict(X_test)
# print('Acc =', accuracy_score(y_test, predictions))

In [98]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, log_loss

model = xgb.XGBClassifier(max_depth=3, n_estimators=100, tree_method='hist')

model.fit(X_train, y_train)

predictions = model.predict(X_test)

print('Acc', accuracy_score(y_test, predictions))
print('LogLoss', log_loss(y_test, predictions))

Acc 0.7723056127034112
LogLoss 8.206937574365645


In [86]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Split your data
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base estimator and AdaBoost parameters
# Define the base estimator and AdaBoost parameters
# base_estimator =  DecisionTreeClassifier(max_depth=1)
model = AdaBoostClassifier()

# Train AdaBoost
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")



Accuracy: 0.7590427887467518


In [72]:
import optuna
import xgboost as xgb
from sklearn.metrics import accuracy_score, log_loss

def objective(trial):
    # Suggest hyperparameters using the new `suggest_float` method
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'eta': trial.suggest_float('eta', 0.01, 0.3, log=True),  # learning rate
        'max_depth': trial.suggest_int('max_depth', 3, 9),        # max depth of trees
        'subsample': trial.suggest_float('subsample', 0.5, 1.0), # subsample ratio
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),  # colsample by tree
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),  # L2 regularization
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),    # L1 regularization
        'random_state': 42
    }

    # Create DMatrix for XGBoost
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    # Train the model
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    model = xgb.train(params, dtrain, num_boost_round=1000, evals=evallist, early_stopping_rounds=200, verbose_eval=False)

    # Predict on validation set
    y_val_pred = model.predict(dval)
    y_val_pred_binary = [1 if p > 0.5 else 0 for p in y_val_pred]

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred_binary)

    return accuracy



# Create a study object and optimize the objective function
study = optuna.create_study(direction='maximize')  # We want to maximize accuracy
study.optimize(objective, n_trials=5, show_progress_bar=True)  # You can set n_trials higher for more iterations

# Print the best hyperparameters
print('Best trial:')
trial = study.best_trial
print(f'  Accuracy: {trial.value}')
print('  Best hyperparameters: ', trial.params)

[I 2024-10-31 12:30:18,656] A new study created in memory with name: no-name-735863f1-c33a-4b49-b33f-89bea9eeee25


  0%|          | 0/5 [00:00<?, ?it/s]

[W 2024-10-31 12:30:47,884] Trial 0 failed with parameters: {'eta': 0.021779213173376277, 'max_depth': 7, 'subsample': 0.7786862764529311, 'colsample_bytree': 0.8437810658175637, 'lambda': 0.0011553578016208121, 'alpha': 0.008071252456471213} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_377542/1980045240.py", line 25, in objective
    model = xgb.train(params, dtrain, num_boost_round=1000, evals=evallist, early_stopping_rounds=200, verbose_eval=False)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 182, in train
    if cb_container.after_iteration(bst, i, dtrain, evals):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/callback.py", line 258, in

KeyboardInterrupt: 

## Data Loader


In [28]:
def prepare_data_loaders(X_train, y_train, X_test, y_test, X_val, y_val, batch_size=8, num_workers=16):
    # Convert datasets to TensorDataset (pairs features and labels)
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    val_dataset = TensorDataset(X_val, y_val)
    
    # Create DataLoader objects for train, test, and validation datasets
    loaders = {
        "train": DataLoader(train_dataset, batch_size=batch_size, drop_last=True, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
        "test": DataLoader(test_dataset, batch_size=batch_size, drop_last=True, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
        "val": DataLoader(val_dataset, batch_size=batch_size, drop_last=True, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
    }
    return loaders

## Train & Test Functions
Here we have basic train and test functions.

In [29]:
def train_epoch_progress(model, loaders, criterion, optimizer, num_epochs, epoch, device):
    model.train()
    
    # Use tqdm to display progress bar for the training loop
    leave = True
    train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch', leave=leave)
    
    # Our training dataset is has well over a million examples.
    # We expect the loss to change a lot over a single epoch,
    # so we only show the loss of the 10_000 most recent batches.
    running_loss = deque(maxlen=10000)
    
    # Train epoch
    for X_train, y_train in train_loader_tqdm:
        optimizer.zero_grad()
        
        X_train_gpu = X_train.to(device)
        y_train_gpu = y_train.to(device)
        
        output_gpu = model(X_train_gpu)
        
        loss = criterion(output_gpu, y_train_gpu)
        running_loss.append(loss.item())  # Store loss for averaging
        
        loss.backward()
        optimizer.step()
        
        # Calculate and set the average loss for the tqdm progress bar
        avg_loss = sum(running_loss) / len(running_loss) if len(running_loss) > 0 else 0
        
        train_loader_tqdm.set_postfix(loss=f"{avg_loss:.4f}")

    return

def test_model_progress(model, loaders, criterion, device, num_epochs, epoch, loader='test'):
    # Validate epoch:
    model.eval()
    leave = True
    test_loader_tqdm = tqdm(loaders[loader], desc=f'Test {epoch+1}/{num_epochs}', unit='batch', leave=leave)
    test_loss = []
    num_tested = []
    correct_pred = 0
    
    with torch.no_grad():
        for X_test, y_test in test_loader_tqdm:
            X_test_gpu = X_test.to(device)
            y_test_gpu = y_test.to(device)
            
            output_gpu = model(X_test_gpu)
            
            # Accumulate test loss
            test_loss.append(criterion(output_gpu, y_test_gpu).item() * X_test.shape[0])
            num_tested.append(X_test.shape[0])
            
            # Calculate number of correct predictions for binary classification
            correct_pred += torch.sum(((nn.Sigmoid()(output_gpu) > 0.5) == y_test_gpu).float()).item()
            
            test_loader_tqdm.set_postfix(loss=f"{sum(test_loss) / sum(num_tested):.4f}", acc=f"{correct_pred / sum(num_tested):.1%}")
        
        # Calculate average loss and accuracy
        avg_loss = sum(test_loss) / sum(num_tested)
        accuracy = correct_pred / sum(num_tested)
        
    return avg_loss, accuracy

Here we have the same train and test functions as above, but without the progress bars.

In [30]:
def train_epoch(model, loaders, criterion, optimizer, num_epochs, epoch, device):
    model.train()
    
    # Train epoch
    for X_train, y_train in loaders['train']:
        optimizer.zero_grad()
        
        X_train_gpu = X_train.to(device)
        y_train_gpu = y_train.to(device)
        
        output_gpu = model(X_train_gpu)
        
        loss = criterion(output_gpu, y_train_gpu)
        
        loss.backward()
        optimizer.step()
        
    return 

def test_model(model, loaders, criterion, device, num_epochs, epoch, loader='test'):
    # Validate epoch:
    model.eval()
    test_loss = []
    num_tested = []
    correct_pred = 0
    
    with torch.no_grad():
        for X_test, y_test in loaders[loader]:
            X_test_gpu = X_test.to(device)
            y_test_gpu = y_test.to(device)
            
            output_gpu = model(X_test_gpu)
            
            # Accumulate test loss.
            test_loss.append(criterion(output_gpu, y_test_gpu).item() * X_test.shape[0])
            num_tested.append(X_test.shape[0])
            
            # Calculate number of correct predictions for binary classification.
            correct_pred += torch.sum(((nn.Sigmoid()(output_gpu) > 0.5) == y_test_gpu).float()).item()
        
        # Calculate average loss and accuracy
        avg_loss = sum(test_loss) / sum(num_tested)
        accuracy = correct_pred / sum(num_tested)

    return avg_loss, accuracy


## Optuna Study
We create a simple optuna study to find a good model architecture.

In [31]:
def objective(trial, num_layers, min_out, max_out, in_features, loaders, study_name):
    input_dropout = 0
    hidden_dropout = .2
    # input_dropout = trial.suggest_float("input_dropout", 0, .5)
    hidden_dropout = trial.suggest_float("hidden_dropout", 0, .5)
    
    # Generate the output features for each layer using trial suggestions
    out_features = []
    for i in range(num_layers):
        out_features.append(trial.suggest_int(f"out_features_layer_{i}", min_out, max_out))
    
    # Create model and move to device
    model = Model(in_features, out_features, input_dropout, hidden_dropout).to(device)
    
    # Compile the model (not always worth it)
    # model.compile()
    
    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    criterion = nn.BCEWithLogitsLoss()
    
    num_epochs = 1
    
    # Training loop for num_epochs
    for epoch in range(num_epochs):
        train_epoch(model, loaders, criterion, optimizer, num_epochs, epoch, device)
    
    test_loss, test_accuracy = test_model(model, loaders, criterion, device, num_epochs, epoch, loader='test')
    print(f"Accuracy = {test_accuracy:.1%}")
    ## Print results if we want 
    # print(f"Loss={test_loss:0.5f}, Accuracy={test_accuracy:0.1%}")
    
    # Return the test loss to be minimized
    return test_loss

In [32]:
loaders = prepare_data_loaders(X_train, y_train, X_test, y_test, X_val, y_val, batch_size=16, num_workers=1)

# Define the parameters for the study
study_name = "Baseline"
num_layers = 3
min_out = 16
max_out = 1024 * 2
in_features = X_train.shape[1]

# Create the study
study = optuna.create_study(study_name=study_name, direction='minimize')

# Define the objective function and run the optimization
study.optimize(lambda trial: objective(trial, num_layers, min_out, max_out, in_features, loaders, study_name), 
               n_trials=10, show_progress_bar=True)  # You can specify how many trials you want

# Print the best parameters found by the study
print()
print(f"Best parameters: {study.best_params}")
print(f"Best trial: {study.best_trial}")

[I 2024-10-31 12:02:55,543] A new study created in memory with name: Baseline


  0%|          | 0/10 [00:00<?, ?it/s]

Accuracy = 76.6%
[I 2024-10-31 12:04:24,327] Trial 0 finished with value: 0.4872140946651383 and parameters: {'hidden_dropout': 0.39654711497366557, 'out_features_layer_0': 292, 'out_features_layer_1': 572, 'out_features_layer_2': 23}. Best is trial 0 with value: 0.4872140946651383.
[W 2024-10-31 12:05:07,257] Trial 1 failed with parameters: {'hidden_dropout': 0.36177104390061604, 'out_features_layer_0': 247, 'out_features_layer_1': 706, 'out_features_layer_2': 121} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_377542/2175656236.py", line 14, in <lambda>
    study.optimize(lambda trial: objective(trial, num_layers, min_out, max_out, in_features, loaders, study_name),
  File "/tmp/ipykernel_377542/3303981478.py", line 26, in objective
    train_epoch(model, loaders, criterion, optimizer, num

KeyboardInterrupt: 

## Train the best model

In [15]:
# Get parameters of the best study
out_features = list(study.best_params.values())[2:]  # Adjust indexing as needed
input_dropout = study.best_params['input_dropout']   # Example key name
hidden_dropout = study.best_params['hidden_dropout'] # Example key name

# Build the model
model = Model(in_features, out_features, input_dropout, hidden_dropout)
# model = Model(X.shape[1], [128, 64, 32], 0, .25)
# model = Model(X.shape[1], [64, 32, 16], 0, .25)
# model = Model(X.shape[1], [64, 16], 0, .25)
# model = Model(X.shape[1], [128*4], 0, .5)
# model = Model(X.shape[1], [128*8], 0, .75)

loaders = prepare_data_loaders(X_train, y_train, X_test, y_test, X_val, y_val, batch_size=8, num_workers=1)

## Compiling might not be worth it (Cannot save the model if we do.)
# model = torch.compile(model)#, mode = 'max-autotune')

# Move model to the GPU
model.to(device)
# model = torch.compile(model)

# Initialize optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCEWithLogitsLoss()

num_epochs = 1
# Training loop for num_epochs
for epoch in range(num_epochs):
    train_epoch_progress(model, loaders, criterion, optimizer, num_epochs, epoch, device)
    test_loss, test_accuracy = test_model_progress(model, loaders, criterion, device, num_epochs, epoch, loader='test')

test_loss, test_accuracy = test_model_progress(model, loaders, criterion, device, num_epochs, epoch, loader='val')
print(f"Val: Loss={test_loss:0.5f}, Accuracy={test_accuracy:0.1%}")

KeyError: 'input_dropout'