In [1]:
import requests
import pandas as pd
import numpy as np
import re
import sys, getopt
import csv
import pickle
import copy
import os

pd.set_option('display.max_rows', 500)

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline  
import seaborn as sns
sns.set_style("darkgrid")

import umap
from sklearn.decomposition import TruncatedSVD as tsvd

def nearZeroVarDropAuto(df,thresh=0.99):
    vVal=df.var(axis=0).values
    cs=pd.Series(vVal).sort_values(ascending=False).cumsum()
    remove=cs[cs>cs.values[-1]*thresh].index.values
    return df.drop(df.columns[remove],axis=1)

%run SodaKick_download_functions.ipynb


import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adagrad, Adam, Adagrad
import torch.nn as nn
import torch.nn.functional as F

In [2]:
#from ray import tune
#from ray.tune import CLIReporter
#from ray.tune.schedulers import ASHAScheduler

from hyperopt import hp, tpe, fmin, Trials
from hyperopt import STATUS_OK, STATUS_FAIL

In [3]:
class EarlyStopping:

    """ Stops the training if loss doesn't improve after a given number of epochs. """

    def __init__(self, patience=3, epsilon=1e-5, keepBest=True, silent=True):

        """
        Args:
            patience (int): Number of epochs without change before stopping the learning (default 3).
            epsilon (float): Minimum change in loss to be considered for early stopping (default 1e-5).
            keepBest (bool): Keep track of the best model (memory consuming).
        """

        self.patience = patience
        self.epsilon = epsilon
        self.counter = 0
        
        self.bestScore = np.inf
     
        self.keepBest = keepBest 
        self.bestModel = None

        self.earlyStop = False
        self.silent = silent

    def __call__(self, loss, model):


        """ Evaluate the loss change between epochs and activates early stop if below epsilon.

        Args:
            loss (float): current loss.
            model (torch model): the current model.
        """

        if loss > self.bestScore - self.epsilon:

            self.counter += 1
            if not self.silent:
                print('EarlyStopping counter: {:d}/{:d}'.format(self.counter,self.patience))

            if self.counter >= self.patience:
                self.earlyStop = True

        else:   

            self.counter = 0
            self.bestScore = loss

            if self.keepBest:
                self.bestModel = copy.deepcopy(model)


In [4]:
class matchesDataset(Dataset):

    """ Extend pytorch Dataset class to include cleaning and training set creation, """
    
    def __init__(self, matches, results):

        self.matches = torch.tensor(matches, dtype=torch.float32)
        self.results = torch.tensor(results, dtype=torch.float32)

    def __len__(self):

        """ Returns the len of the training sample. """
        
        return len(self.matches)
        

    def __getitem__(self, index): 

        """ Returns a word, a context word and a list of negative words for training for a given index. 

        Args:
            index (int): index for the word selection.

        Returns:
            (string, string, list of strings): selected word, context word and a randomly drawn list 
                                               of negative words.

        """
        return self.matches[index], self.results[index]

In [5]:
#https://towardsdatascience.com/quirky-keras-custom-and-asymmetric-loss-functions-for-keras-in-r-a8b5271171fe
#weighted asimmetric square error, errors by going below the value (not seeing a goal when it's there) are weighted more

def WSE(output, target, a=1.5, b=.5):
    loss = torch.mean(a/(a+b)*torch.minimum(torch.zeros(output.shape[1]),output - target)**2+\
                      b/(a+b)*torch.maximum(torch.zeros(output.shape[1]),output - target)**2)      
    return loss

def WSEl1(output, target, a=1.5, b=.5):
    loss = torch.mean(a/(a+b)*torch.abs(torch.minimum(torch.zeros(output.shape[1]),output - target))+\
                      b/(a+b)*torch.abs(torch.maximum(torch.zeros(output.shape[1]),output - target)))      
    return loss

def WSE2(output, target, a=1.5, b=.5):
    loss = np.mean(a/(a+b)*np.minimum(np.zeros(output.shape[0]),output - target)**2+\
                      b/(a+b)*np.maximum(np.zeros(output.shape[0]),output - target)**2)      
    return loss

def WSEl12(output, target, a=1.5, b=.5):
    loss = np.mean(a/(a+b)*np.abs(np.minimum(np.zeros(output.shape[0]),output - target))+\
                      b/(a+b)*np.abs(np.maximum(np.zeros(output.shape[0]),output - target)))      
    return loss

In [6]:
def normalize_mins(vec):
    for i in range(vec.shape[0]):
        vec[i][::8]=vec[i][::8]/90

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def NormalizeMatrix(data):   
    for i in range(data.shape[1]):
        data[:,i] = NormalizeData(data[:,i])

def norm_max(out):
    
    maxes=[]
    for i in range(int(out.shape[1]/8.0)):
        maxes.append(out[:,8*int(i):8*(int(i)+1)].max(axis=0))

        #maxes.append(out.max(axis=1)[8*int(i):8*(int(i)+1):8])
    denominator=np.tile(np.max(maxes,axis=0),int(out.shape[1]/8))
    return out/denominator, denominator 

with open(r'/Users/federico comitani/GitHub/sodakick/data/inp_220223.pkl', 'rb') as pk:
    inp=pickle.load(pk)
with open(r'/Users/federico comitani/GitHub/sodakick/data/out_220223.pkl', 'rb') as pk:
    out=np.array(pickle.load(pk),dtype=float)
    
### skipping norm for now since it's already tsvd 
#NormalizeMatrix(inp)
#np.nan_to_num(inp, copy=False)

from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()
inp = scaler.fit_transform(inp)

#normalize_mins(out)
out, denominator= norm_max(out)

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
         inp[:25000], out[:25000], test_size=0.2, random_state=32)

In [8]:
class Net(nn.Module):
    
    def __init__(self, num_layers, num_nodes, scaling_factor, num_nodes_out, final_activation, batch_norm, dropout):
        super(Net, self).__init__()
        
        self.fc = []
        self.lr = []
        self.bn = []
        self.dp = []
        self.fact = final_activation
        self.nl = num_layers
        self.batch_norm = batch_norm
        self.dropout = dropout
        
        power=0
        
        for i in range(self.nl):
            self.fc.append(nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes*(scaling_factor**(power+1)))))
            self.lr.append(nn.LeakyReLU())
            
            if self.batch_norm:
                self.bn.append(nn.BatchNorm1d(int(num_nodes*(scaling_factor**(power+1)))))

            if self.dropout>0.0:
                self.dp.append(nn.Dropout(dropout))
                
            power+=1
        
        self.oupt = nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes_out))
    
    def forward(self, x):
        z = x
        for i in range(self.nl):
            
            z = self.fc[i](z)
            
            if self.batch_norm:
                z = self.bn[i](z)
            
            z = self.lr[i](z)
        
            if self.dropout>0.0:
                z = self.dp[i](z)
                
        if self.fact is not None:
            z = self.fact(z)
        
        z = self.oupt(z)
        
        return z
    
    def clp(self):
        with torch.no_grad():
            for i in range(self.nl):
                self.fc[i].weight.copy_ (self.fc[i].weight.data.clamp(min=0)) 
            self.oupt.weight.copy_ (self.oupt.weight.data.clamp(min=0))

In [9]:
def train(config, model=Net, silent=True, checkpoint_dir=None):
    
    
    try:
        phases = ['train','val']

        #x_train, x_test, y_train, y_test = data[0], data[1], data[2], data[3]

        training_set = matchesDataset(x_train, y_train)
        trainBatch = torch.utils.data.DataLoader(training_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

        validation_set = matchesDataset(x_test, y_test)
        valBatch = torch.utils.data.DataLoader(validation_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

        earlStop = EarlyStopping(patience=config['patience'], keepBest=False)

        net = model(config['num_layers'], config['num_nodes'], config['scaling_factor'], 
                    config['num_nodes_out'], config['final_activation'], config['batch_norm'], config['dropout'])

        device = "cpu"
        if torch.cuda.is_available():
            device = "cuda:0"
            if torch.cuda.device_count() > 1:
                net = nn.DataParallel(net)
        net.to(device)

        if checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(checkpoint_dir, "checkpoint"))
            net.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

        if config['optim']=='adam':
            optimizer = Adam(net.parameters(), lr=config['lr'])
        elif config['optim']=='adagrad':
            optimizer = Adagrad(net.parameters(), lr=config['lr'])
        else:
            print('optim error')
            return


        losses=[[],[]]
        mses=[]
        diffs=[]
        exit=False

        #for epoch in tqdm(range(epochs), desc='Epoch'):
        for epoch in range(config['epochs']):

            if exit:
                break

            for phase in phases:
                if phase == 'train':
                    net.train(True) 

                    """ Run the training of the model. """    

                    losses_batch=[]
                    for batchNum, batch in enumerate(trainBatch):

                        x = batch[0]
                        y = batch[1]

                        """ Move batches to GPU if available. """

                        if torch.cuda.is_available():
                            x = x.cuda()
                            y = y.cuda()

                        """ Core of training. """

                        loss = config['loss_f'](net(x), y)

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                        if config['clip']:
                            net.clp()

                        losses_batch.append(loss)

                    """ Early stop check. """

                    earlStop(loss, net)
                    finalepoch = epoch

                    if earlStop.earlyStop:

                        if not silent:
                            print('Limit loss improvement reached, stopping the training.')

                        exit=True 

                    #losses[0].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
                else:
                    net.train(False)
                    net.eval()

                    val_loss=0
                    val_mse=0

                    losses_batch=[]
                    for batchNum, batch in enumerate(valBatch):

                        x = batch[0]
                        y = batch[1]

                        """ Move batches to GPU if available. """

                        if torch.cuda.is_available():
                            x = x.cuda()
                            y = y.cuda()

                        """ Core of training. """

                        optimizer.zero_grad()
                        output=net(x)
                        target=y
                        loss = config['loss_f'](output, target)

                        #losses_batch.append(loss)
                        val_loss+=loss.detach().numpy()
                        val_mse+=nn.MSELoss()(output, target).detach().numpy()

                    #losses[1].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())

                    #with tune.checkpoint_dir(epoch) as checkpoint_dir:
                    #    path = os.path.join(checkpoint_dir, "checkpoint")
                    #    torch.save((net.state_dict(), optimizer.state_dict()), path)

                    #tune.report(loss=(val_loss/batchNum), mse=(val_mse/batchNum))
                    #tune.report(loss=torch.mean(torch.stack(losses_batch)))

        return {'loss': (val_loss/batchNum), 'status': STATUS_OK , 'mse': (val_mse/batchNum)}
    
    except:
        
        return {'loss': np.nan, 'status': STATUS_FAIL, 'mse': np.nan}



In [31]:
def revert_output(output,multiplier=denominator,lineup=None):

    reframe=pd.DataFrame(output.reshape(48,8),
                 columns=['minutes','goals','assists','cards_yellow','cards_red','own_goals','goals_against','saves'])
    
    reframe[reframe<0] = 0
    if lineup is not None:
        reframe.index=lineup
        reframe.drop([x for x in reframe.index if x.startswith('dummy')], axis=0, inplace=True)
    
    
    #reframe['minutes']*=90
    reframe=reframe*denominator[:8]
    byteamframe=pd.concat([reframe.iloc[:24,:].sum(axis=0),reframe.iloc[24:,:].sum(axis=0)], axis=1).T
    
    return reframe, byteamframe[byteamframe.columns[1:]]

In [11]:
print('Baseline WSE: {:.3f}'.format(WSE2(np.array([0]*out[0].shape[0]),out[0])))
print('Baseline WSE l1: {:.3f}'.format(WSEl12(np.array([0]*out[0].shape[0]),out[0])))
print('Baseline MSE: {:.3f}'.format(WSE2(np.array([0]*out[0].shape[0]),out[0], a=1, b=1)))
print('Baseline MSE l1: {:.3f}'.format(WSEl12(np.array([0]*out[0].shape[0]),out[0], a=1, b=1)))

print(np.abs(out[1]-out[10]).sum())
print(np.abs(out[50]-out[60]).sum())
print(np.abs(out[100]-out[110]).sum())

Baseline WSE: 0.039
Baseline WSE l1: 0.051
Baseline MSE: 0.026
Baseline MSE l1: 0.034
35.763398692810455
23.69281045751634
34.116993464052285


In [12]:
def run_hopt(config, num_samples=10):#, gpus_per_trial=2):
    
    trials = Trials()
    result = fmin(
            fn=train,
            space=config,
            algo=tpe.suggest,
            max_evals=num_samples,
            trials=trials,
            show_progressbar=True),
            #early_stop_fn=10,
            #trials_save_file=None)
    
    
    return trials
    #return best_trained_model
    #test_acc = test_accuracy(best_trained_model, device)
    #print("Best trial test set accuracy: {}".format(test_acc))

In [13]:
 """config = {
        "num_layers": hp.choice('num_layers', [1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": hp.uniform('scaling_factor', 0.5, 1.5),
        "num_nodes_out": out.shape[1], 
        "final_activation" : None, #hp.choice('final_activation',[torch.tanh, None]),
        "clip": False, #hp.choice('clip',[True, False]),
        "batch_size": 32, #[16, 32, 64, 128]
        "loss_f": nn.MSELoss(),#hp.choice('loss_f',[WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": hp.choice('optim',['adam', 'adagrad']),
        "lr": hp.loguniform('lr', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),
        "batch_norm": hp.choice('batch_norm',[True, False]),
        "dropout": hp.choice('dropout',[0.0,0.1,0.2,0.3,0.4,0.5]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    


btm = run_hopt(config, num_samples=100)
#https://docs.ray.io/en/latest/tune/api_docs/suggestion.html#tune-search-alg


results_df=[]

for trial in btm.trials:
    results_df.append([trial['result']['loss'],
    [1,2,3][trial['misc']['vals']["num_layers"][0]],
    #[inp.shape[1]][trial['misc']['vals']["num_nodes"][0]],
    trial['misc']['vals']["scaling_factor"][0],
    #[out.shape[1]][trial['misc']['vals']["num_nodes_out"][0]],
    #[torch.tanh, None][trial['misc']['vals']["final_activation"][0]],
    #[True, False][trial['misc']['vals']["clip"][0]],
    #[16, 32, 64, 128][trial['misc']['vals']["batch_size"][0]],
    #trial['misc']['vals']["loss_f"][0],
    ['adam', 'adagrad'][trial['misc']['vals']["optim"][0]],
    trial['misc']['vals']["lr"][0],
    [True, False][trial['misc']['vals']["batch_norm"][0]],
    [0.0,0.1,0.2,0.3,0.4,0.5][trial['misc']['vals']["dropout"][0]],
    #True][trial['misc']['vals']["shuffle"][0]],
    #[4][trial['misc']['vals']["num_workers"][0]],
    #[10][trial['misc']['vals']["patience"][0]],
    #[50][trial['misc']['vals']["epochs"][0]]])
                       
    ])

    
results_df=pd.DataFrame(results_df,columns=['loss','num_layers','scaling_factor',#'final_activation','clip',
                                 'optim','lr','batch_norm','dropout']).sort_values('loss')
results_df.to_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1.h5',key='df')"""

'config = {\n       "num_layers": hp.choice(\'num_layers\', [1, 2, 3]),\n       "num_nodes": inp.shape[1],\n       "scaling_factor": hp.uniform(\'scaling_factor\', 0.5, 1.5),\n       "num_nodes_out": out.shape[1], \n       "final_activation" : None, #hp.choice(\'final_activation\',[torch.tanh, None]),\n       "clip": False, #hp.choice(\'clip\',[True, False]),\n       "batch_size": 32, #[16, 32, 64, 128]\n       "loss_f": nn.MSELoss(),#hp.choice(\'loss_f\',[WSE, nn.MSELoss()]), #, nn.L1Loss()\n       "optim": hp.choice(\'optim\',[\'adam\', \'adagrad\']),\n       "lr": hp.loguniform(\'lr\', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),\n       "batch_norm": hp.choice(\'batch_norm\',[True, False]),\n       "dropout": hp.choice(\'dropout\',[0.0,0.1,0.2,0.3,0.4,0.5]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),\n       "shuffle": True,\n       "num_workers": 4,\n       "patience": 10,\n       "epochs": 100\n   }\n   \n\n\nbtm = run_hopt(config, num_samples=100)\n#https://d

In [14]:
""" config = {
        "num_layers": hp.choice('num_layers', [1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": hp.uniform('scaling_factor', 0.5, 1.5),
        "num_nodes_out": out.shape[1], 
        "final_activation" : None, #hp.choice('final_activation',[torch.tanh, None]),
        "clip": False, #hp.choice('clip',[True, False]),
        "batch_size": 32, #[16, 32, 64, 128]
        "loss_f": WSE,#hp.choice('loss_f',[WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": hp.choice('optim',['adam', 'adagrad']),
        "lr": hp.loguniform('lr', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),
        "batch_norm": hp.choice('batch_norm',[True, False]),
        "dropout": hp.choice('dropout',[0.0,0.1,0.2,0.3,0.4,0.5]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    
btm = run_hopt(config, num_samples=100)

results_df=[]

for trial in btm.trials:
    results_df.append([trial['result']['loss'],
    [1,2,3][trial['misc']['vals']["num_layers"][0]],
    #[inp.shape[1]][trial['misc']['vals']["num_nodes"][0]],
    trial['misc']['vals']["scaling_factor"][0],
    #[out.shape[1]][trial['misc']['vals']["num_nodes_out"][0]],
    #[torch.tanh, None][trial['misc']['vals']["final_activation"][0]],
    #[True, False][trial['misc']['vals']["clip"][0]],
    #[16, 32, 64, 128][trial['misc']['vals']["batch_size"][0]],
    #trial['misc']['vals']["loss_f"][0],
    ['adam', 'adagrad'][trial['misc']['vals']["optim"][0]],
    trial['misc']['vals']["lr"][0],
    [True, False][trial['misc']['vals']["batch_norm"][0]],
    [0.0,0.1,0.2,0.3,0.4,0.5][trial['misc']['vals']["dropout"][0]],
    #True][trial['misc']['vals']["shuffle"][0]],
    #[4][trial['misc']['vals']["num_workers"][0]],
    #[10][trial['misc']['vals']["patience"][0]],
    #[50][trial['misc']['vals']["epochs"][0]]])
                       
    ])

    
results_df=pd.DataFrame(results_df,columns=['loss','num_layers','scaling_factor',#'final_activation','clip',
                                 'optim','lr','batch_norm','dropout']).sort_values('loss')
results_df.to_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_wse.h5',key='df')"""

' config = {\n        "num_layers": hp.choice(\'num_layers\', [1, 2, 3]),\n        "num_nodes": inp.shape[1],\n        "scaling_factor": hp.uniform(\'scaling_factor\', 0.5, 1.5),\n        "num_nodes_out": out.shape[1], \n        "final_activation" : None, #hp.choice(\'final_activation\',[torch.tanh, None]),\n        "clip": False, #hp.choice(\'clip\',[True, False]),\n        "batch_size": 32, #[16, 32, 64, 128]\n        "loss_f": WSE,#hp.choice(\'loss_f\',[WSE, nn.MSELoss()]), #, nn.L1Loss()\n        "optim": hp.choice(\'optim\',[\'adam\', \'adagrad\']),\n        "lr": hp.loguniform(\'lr\', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),\n        "batch_norm": hp.choice(\'batch_norm\',[True, False]),\n        "dropout": hp.choice(\'dropout\',[0.0,0.1,0.2,0.3,0.4,0.5]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),\n        "shuffle": True,\n        "num_workers": 4,\n        "patience": 10,\n        "epochs": 100\n    }\n    \nbtm = run_hopt(config, num_samples=100)\n\nre

In [15]:
 config = {
        "num_layers": hp.choice('num_layers', [1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": hp.uniform('scaling_factor', 0.5, 1.5),
        "num_nodes_out": out.shape[1], 
        "final_activation" : hp.choice('final_activation',[torch.tanh, None]),
        "clip": False, #hp.choice('clip',[True, False]),
        "batch_size": 32, #[16, 32, 64, 128]
        "loss_f": nn.MSELoss(),#WSE,#hp.choice('loss_f',[WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": 'adam',#hp.choice('optim',['adam', 'adagrad']),
        "lr": hp.choice('lr',[0.0001,0.001,.00001]),#hp.loguniform('lr', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),
        "batch_norm": hp.choice('batch_norm',[True, False]),
        "dropout": hp.choice('dropout',[0.0,0.1,0.2,0.3]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    
btm = run_hopt(config, num_samples=100)

100%|██████████| 100/100 [9:41:19<00:00, 348.80s/trial, best loss: 0.006937485976287952]   


In [22]:
results_df=[]

for trial in btm.trials:
    results_df.append([trial['result']['loss'],
    trial['result']['mse'],
    [1,2,3][trial['misc']['vals']["num_layers"][0]],
    #[inp.shape[1]][trial['misc']['vals']["num_nodes"][0]],
    trial['misc']['vals']["scaling_factor"][0],
    #[out.shape[1]][trial['misc']['vals']["num_nodes_out"][0]],
    [torch.tanh, None][trial['misc']['vals']["final_activation"][0]],
    #[True, False][trial['misc']['vals']["clip"][0]],
    #[16, 32, 64, 128][trial['misc']['vals']["batch_size"][0]],
    #trial['misc']['vals']["loss_f"][0],
    #['adam', 'adagrad'][trial['misc']['vals']["optim"][0]],
    [0.0001,0.001,.00001][trial['misc']['vals']["lr"][0]],
    [True, False][trial['misc']['vals']["batch_norm"][0]],
    [0.0,0.1,0.2,0.3,0.4,0.5][trial['misc']['vals']["dropout"][0]],
    #True][trial['misc']['vals']["shuffle"][0]],
    #[4][trial['misc']['vals']["num_workers"][0]],
    #[10][trial['misc']['vals']["patience"][0]],
    #[50][trial['misc']['vals']["epochs"][0]]])
                       
    ])

    
results_df=pd.DataFrame(results_df,columns=['loss',
                                            'mse',
                                            'num_layers',
                                            'scaling_factor',
                                            'final_activation',
                                            #'clip', 
                                            #'optim', 
                                            'lr',
                                            'batch_norm',
                                            'dropout']).sort_values('loss')
results_df.to_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_mse.h5',key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['final_activation'], dtype='object')]

  encoding=encoding,


In [23]:
results_df.sort_values('loss')

Unnamed: 0,loss,mse,num_layers,scaling_factor,final_activation,lr,batch_norm,dropout
90,0.006937,0.006937,1,1.130029,,0.0001,False,0.0
93,0.006951,0.006951,1,1.172128,<built-in method tanh of type object at 0x1406...,0.0001,False,0.0
94,0.007002,0.007002,1,0.935703,<built-in method tanh of type object at 0x1406...,0.0001,False,0.0
99,0.007013,0.007013,1,0.953396,<built-in method tanh of type object at 0x1406...,0.0001,False,0.0
96,0.007059,0.007059,1,0.89852,<built-in method tanh of type object at 0x1406...,0.0001,False,0.0
80,0.007068,0.007068,1,1.320935,,1e-05,False,0.0
36,0.007084,0.007084,3,1.45853,,0.001,False,0.0
75,0.007088,0.007088,3,1.384867,,0.001,False,0.0
69,0.007094,0.007094,3,1.461717,,0.001,False,0.0
70,0.007107,0.007107,3,1.461395,,0.001,False,0.0


In [41]:
 config = {
        "num_layers": hp.choice('num_layers', [1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": hp.uniform('scaling_factor', 0.5, 1.5),
        "num_nodes_out": out.shape[1], 
        "final_activation" : None, #hp.choice('final_activation',[torch.tanh, None]),
        "clip": False, #hp.choice('clip',[True, False]),
        "batch_size": 32, #[16, 32, 64, 128]
        "loss_f": WSE,#WSE,#hp.choice('loss_f',[WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": 'adam',#hp.choice('optim',['adam', 'adagrad']),
        "lr": 0.0001, #hp.choice('lr',[0.0001,0.001,.01,.1]),#hp.loguniform('lr', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),
        "batch_norm": False, #hp.choice('batch_norm',[True, False]),
        "dropout": 0.0, #hp.choice('dropout',[0.0,0.1,0.2,0.3,0.4,0.5]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    
btm = run_hopt(config, num_samples=50)


100%|██████████| 50/50 [2:48:15<00:00, 201.92s/trial, best loss: 0.0036771481122391727]  


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['final_activation'], dtype='object')]

  encoding=encoding,


In [49]:
results_df=[]

for trial in btm.trials:
    results_df.append([trial['result']['loss'],
    trial['result']['mse'],
    [1,2,3][trial['misc']['vals']["num_layers"][0]],
    #[inp.shape[1]][trial['misc']['vals']["num_nodes"][0]],
    trial['misc']['vals']["scaling_factor"][0],
    #[out.shape[1]][trial['misc']['vals']["num_nodes_out"][0]],
    #[torch.tanh, None][trial['misc']['vals']["final_activation"][0]],
    #[True, False][trial['misc']['vals']["clip"][0]],
    #[16, 32, 64, 128][trial['misc']['vals']["batch_size"][0]],
    #trial['misc']['vals']["loss_f"][0],
    #['adam', 'adagrad'][trial['misc']['vals']["optim"][0]],
    #[0.0001,0.001,.01,.1][trial['misc']['vals']["lr"][0]],
    #[True, False][trial['misc']['vals']["batch_norm"][0]],
    #[0.0,0.1,0.2,0.3,0.4,0.5][trial['misc']['vals']["dropout"][0]],
    #True][trial['misc']['vals']["shuffle"][0]],
    #[4][trial['misc']['vals']["num_workers"][0]],
    #[10][trial['misc']['vals']["patience"][0]],
    #[50][trial['misc']['vals']["epochs"][0]]])
                       
    ])

 

results_df=pd.DataFrame(results_df,columns=['loss',
                                            'mse',
                                            'num_layers',
                                            'scaling_factor',
                                            #'final_activation',
                                            #'clip', 
                                            #'optim', 
                                            #'lr',
                                            #'batch_norm',
                                            #'dropout'
                                            ]).sort_values('loss')
results_df.to_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_wse.h5',key='df')

In [52]:
#results_df.sort_values('loss')

In [217]:
 config = {
        "num_layers": hp.choice('num_layers', [1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": hp.uniform('scaling_factor', 0.5, 1.5),
        "num_nodes_out": out.shape[1], 
        "final_activation" : None, #hp.choice('final_activation',[torch.tanh, None]),
        "clip": False, #hp.choice('clip',[True, False]),
        "batch_size": 32, #[16, 32, 64, 128]
        "loss_f": WSE,#WSE,#hp.choice('loss_f',[WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": 'adam',#hp.choice('optim',['adam', 'adagrad']),
        "lr": hp.choice('lr',[0.0001,0.001,.00001]),#hp.loguniform('lr', np.exp(np.log(1e-4)), np.exp(np.log(1e-1))),
        "batch_norm": False,#hp.choice('batch_norm',[True, False]),
        "dropout": hp.choice('dropout',[0.0,0.1,0.2,0.3]),#hp.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    
btm = run_hopt(config, num_samples=50)

100%|██████████| 50/50 [4:24:36<00:00, 317.53s/trial, best loss: 0.03656742031662128]  


In [220]:
results_df=[]

for trial in btm.trials:
    results_df.append([trial['result']['loss'],
    [1,2,3][trial['misc']['vals']["num_layers"][0]],
    #[inp.shape[1]][trial['misc']['vals']["num_nodes"][0]],
    trial['misc']['vals']["scaling_factor"][0],
    #[out.shape[1]][trial['misc']['vals']["num_nodes_out"][0]],
    #[torch.tanh, None][trial['misc']['vals']["final_activation"][0]],
    #[True, False][trial['misc']['vals']["clip"][0]],
    #[16, 32, 64, 128][trial['misc']['vals']["batch_size"][0]],
    #trial['misc']['vals']["loss_f"][0],
    #['adam', 'adagrad'][trial['misc']['vals']["optim"][0]],
    [0.0001,0.001,.00001][trial['misc']['vals']["lr"][0]],
    #[True, False][trial['misc']['vals']["batch_norm"][0]],
    [0.0,0.1,0.2,0.3,0.4,0.5][trial['misc']['vals']["dropout"][0]],
    #True][trial['misc']['vals']["shuffle"][0]],
    #[4][trial['misc']['vals']["num_workers"][0]],
    #[10][trial['misc']['vals']["patience"][0]],
    #[50][trial['misc']['vals']["epochs"][0]]])
                       
    ])

    
results_df=pd.DataFrame(results_df,columns=['loss','num_layers','scaling_factor',#'final_activation','clip', #'optim', #'batch_norm',
                                 'lr','dropout']).sort_values('loss')
results_df.to_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_wse_adam.h5',key='df')

In [221]:
results_df.sort_values('loss')

Unnamed: 0,loss,num_layers,scaling_factor,lr,dropout
25,0.036567,3,1.490137,0.0001,0.0
21,0.036576,3,1.488431,0.0001,0.0
5,0.036726,3,1.442904,0.0001,0.0
30,0.036774,3,1.499753,0.0001,0.0
20,0.036937,3,1.470998,0.0001,0.0
36,0.036961,1,1.425598,0.0001,0.1
42,0.036978,1,1.318707,0.0001,0.1
27,0.037003,3,1.483998,0.0001,0.0
22,0.037089,3,1.376107,0.0001,0.0
49,0.037104,2,1.363401,0.0001,0.0


In [24]:
import copy

conf_final=copy.deepcopy(config)

for key,value in results_df.sort_values('loss').iloc[0].to_dict().items():
    if key in conf_final:
        conf_final[key]=value
        
conf_final

{'num_layers': 1,
 'num_nodes': 816,
 'scaling_factor': 1.1300291662687787,
 'num_nodes_out': 384,
 'final_activation': None,
 'clip': False,
 'batch_size': 32,
 'loss_f': MSELoss(),
 'optim': 'adam',
 'lr': 0.0001,
 'batch_norm': False,
 'dropout': 0.0,
 'shuffle': True,
 'num_workers': 4,
 'patience': 10,
 'epochs': 100}

In [25]:
from tqdm import tqdm

def train_alone(config, model=Net, silent=True, checkpoint_dir=None):
    
    
    phases = ['train','val']

    #x_train, x_test, y_train, y_test = data[0], data[1], data[2], data[3]

    training_set = matchesDataset(x_train, y_train)
    trainBatch = torch.utils.data.DataLoader(training_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    validation_set = matchesDataset(x_test, y_test)
    valBatch = torch.utils.data.DataLoader(validation_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    earlStop = EarlyStopping(patience=int(config['patience']), keepBest=True)

    net = model(int(config['num_layers']), int(config['num_nodes']), config['scaling_factor'], 
                int(config['num_nodes_out']), config['final_activation'], config['batch_norm'], config['dropout'])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    if config['optim']=='adam':
        optimizer = Adam(net.parameters(), lr=config['lr'])
    elif config['optim']=='adagrad':
        optimizer = Adagrad(net.parameters(), lr=config['lr'])
    else:
        print('optim error')
        return


    losses=[[],[]]
    mses=[]
    diffs=[]
    exit=False

    for epoch in tqdm(range(config['epochs']), desc='Epoch'):
    #for epoch in range(config['epochs']):

        if exit:
            break

        for phase in phases:
            if phase == 'train':
                net.train(True) 

                """ Run the training of the model. """    

                losses_batch=[]
                for batchNum, batch in enumerate(trainBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """

                    loss = config['loss_f'](net(x), y)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    if config['clip']:
                        net.clp()

                    losses_batch.append(loss)

                """ Early stop check. """

                earlStop(loss, net)
                finalepoch = epoch

                if earlStop.earlyStop:

                    if not silent:
                        print('Limit loss improvement reached, stopping the training.')

                    exit=True 

                #losses[0].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
            else:
                net.train(False)
                net.eval()

                val_loss=0
                val_mse=0

                losses_batch=[]
                for batchNum, batch in enumerate(valBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """

                    optimizer.zero_grad()
                    output=net(x)
                    target=y
                    loss = config['loss_f'](output, target)

                    #losses_batch.append(loss)
                    val_loss+=loss.detach().numpy()
                    val_mse+=nn.MSELoss()(output, target).detach().numpy()

                #losses[1].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())

                #with tune.checkpoint_dir(epoch) as checkpoint_dir:
                #    path = os.path.join(checkpoint_dir, "checkpoint")
                #    torch.save((net.state_dict(), optimizer.state_dict()), path)

                #tune.report(loss=(val_loss/batchNum), mse=(val_mse/batchNum))
                #tune.report(loss=torch.mean(torch.stack(losses_batch)))

    return net, val_loss/batchNum


In [26]:
net,loss=train_alone(conf_final, model=Net, silent=True, checkpoint_dir=None)

Epoch:  24%|██▍       | 24/100 [01:14<03:54,  3.08s/it]


In [27]:
pred=net(torch.Tensor(inp)).detach().cpu().numpy()

In [39]:
i=15000
cats=['minutes','goals','assists','cards_yellow','cards_red','own_goals']+['goals_against','saves']

reframe, byteamframe = revert_output(pred[i])
print(byteamframe.astype(int))
reframe, byteamframe = revert_output(out[i])
print(byteamframe.astype(int))

   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      1        1             2          0          0              2      4
1      2        1             2          0          0              2      4
   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      3        3             4          0          0              4      3
1      4        2             3          0          0              3      1


In [267]:
results_df=pd.read_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_adam.h5',key='df')

import copy

conf_final=copy.deepcopy(config)

for key,value in results_df.sort_values('loss').iloc[0].to_dict().items():
    if key in conf_final:
        conf_final[key]=value
        
conf_final

net,loss=train_alone(conf_final, model=Net, silent=True, checkpoint_dir=None)

Epoch:  25%|██▌       | 25/100 [01:51<05:35,  4.47s/it]


In [268]:
pred=net(torch.Tensor(inp)).detach().cpu().numpy()

In [273]:
i=-20000
cats=['minutes','goals','assists','cards_yellow','cards_red','own_goals']+['goals_against','saves']

reframe, byteamframe = revert_output(pred[i])
print(byteamframe.astype(int))
reframe, byteamframe = revert_output(out[i])
print(byteamframe.astype(int))

   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      3        2             6          0          0              4      6
1      5        4             5          0          0              3      5
   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      1        1             4          0          0              1      6
1      1        1             1          0          0              1      1


In [53]:
results_df=pd.read_hdf(r'/Users/federico comitani/GitHub/sodakick/data/hp_res1_wse.h5',key='df')

import copy

conf_final=copy.deepcopy(config)

for key,value in results_df.sort_values('loss').iloc[0].to_dict().items():
    if key in conf_final:
        conf_final[key]=value
        
conf_final

net,loss=train_alone(conf_final, model=Net, silent=True, checkpoint_dir=None)

Epoch:  31%|███       | 31/100 [02:55<06:31,  5.67s/it]


In [54]:
pred=net(torch.Tensor(inp)).detach().cpu().numpy()

In [59]:
i=10000
cats=['minutes','goals','assists','cards_yellow','cards_red','own_goals']+['goals_against','saves']

reframe, byteamframe = revert_output(pred[i])
print(byteamframe.astype(int))
reframe, byteamframe = revert_output(out[i])
print(byteamframe.astype(int))

   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      4        3             5          0          0              2      4
1      2        2             5          0          0              3      6
   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0      3        2             2          0          0              0      1
1      0        0             0          0          0              3      2
