In [7]:
import numpy as np
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F


# #config
config = {
    'data_set': '../../../data/filtered-dataset-no-header.csv',
    'model': {
        'log': False,
        'log_location':'../logs',
        'save_location': '../models',
        'layer_sizes': [32, 64, 128],
        'dropout_rate': 0.7,
        'learning_rates': [0.0005, 0.005, 0.01],
    },
    'epochs': [10,15],
    'save_graphs': False,
    'graph_locations': '../graphs',
    'device': 'cpu'
}


device = torch.device(config['device'])

Using CUDA cores


In [8]:
class Net(nn.Module):
    #constructor
    def __init__(self, num_units=10):
        super(Net, self).__init__() #superclass constructor
        self.fc1 = nn.Linear(10,num_units)
        self.fc2 = nn.Linear(num_units,num_units)
        self.fc3 = nn.Linear(num_units,2)
        self.nonlin = nn.ReLU()
        self.dropout = nn.Dropout(p=config['model']['dropout'])
        self.batchnorm = nn.BatchNorm1d(num_units)

    def forward(self, x):
        x = self.nonlin(self.fc1(x))
        x = self.batchnorm(x)
        x = self.nonlin(self.fc2(x))
        x = self.batchnorm(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x


In [9]:
def load_data_set(filename, delimiter=','):
    dataset = []
    with open(filename, 'r') as f:
        reader = csv.reader(f, delimiter=delimiter)
        for row in reader:
            dataset.append(np.array([row]))
    print(f'The size of the entire dataset is {len(dataset)} points')
    return dataset
    
def create_features_and_labels(dataset):
    features = []
    labels = []
    for point in dataset:
        features.append([float(x) for x in point[0][:10]])
        blue_victory = int(point[0][10]) #1 if blue victory, 0 otherwise
        labels.append(blue_victory)
    
    return np.array(features), np.array(labels)


In [10]:
dataset = load_data_set(config['data_set'])

The size of the entire dataset is 820926 points


In [11]:
X, y = create_features_and_labels(dataset)
X = X.astype(np.float32)
y = y.astype(np.int64)

In [12]:
from sklearn.model_selection import GridSearchCV
from skorch import NeuralNetClassifier

net = NeuralNetClassifier(
    Net,
    max_epochs=10,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
)


params = {
    'lr': config['model']['learning_rates'],
    'max_epochs':config['epochs'],
    'module__num_units': config['model']['layer_sizes'],
}

gs = GridSearchCV(net, params, refit=False, scoring='accuracy')

gs.fit(X, y)
print(gs.best_score_, gs.best_params_)

  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1           nan       [32m0.4963[0m           nan  10.3550
      2           nan       [32m0.5018[0m           nan  10.2740
      3           nan       [32m0.5034[0m           nan  10.2450
      4           nan       [32m0.5040[0m           nan  10.2890
      5           nan       0.5040           nan  10.3200
  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1           nan       [32m0.5040[0m           nan  10.2070
      2           nan       0.5028           nan  10.2320
      3           nan       0.4995           nan  10.2200
      4           nan       0.4989           nan  10.2300
      5           nan       0.4998           nan  10.1580
  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1           nan      