In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import sys, getopt
import csv
import pickle
import copy
import os

from tqdm import tqdm


pd.set_option('display.max_rows', 500)

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline  
import seaborn as sns
sns.set_style("darkgrid")

import umap
from sklearn.decomposition import TruncatedSVD as tsvd

def nearZeroVarDropAuto(df,thresh=0.99):
    vVal=df.var(axis=0).values
    cs=pd.Series(vVal).sort_values(ascending=False).cumsum()
    remove=cs[cs>cs.values[-1]*thresh].index.values
    return df.drop(df.columns[remove],axis=1)

%run SodaKick_download_functions.ipynb


import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adagrad, Adam, Adagrad
import torch.nn as nn
import torch.nn.functional as F

In [2]:
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [3]:
class EarlyStopping:

    """ Stops the training if loss doesn't improve after a given number of epochs. """

    def __init__(self, patience=3, epsilon=1e-5, keepBest=True, silent=True):

        """
        Args:
            patience (int): Number of epochs without change before stopping the learning (default 3).
            epsilon (float): Minimum change in loss to be considered for early stopping (default 1e-5).
            keepBest (bool): Keep track of the best model (memory consuming).
        """

        self.patience = patience
        self.epsilon = epsilon
        self.counter = 0
        
        self.bestScore = np.inf
     
        self.keepBest = keepBest 
        self.bestModel = None

        self.earlyStop = False
        self.silent = silent

    def __call__(self, loss, model):


        """ Evaluate the loss change between epochs and activates early stop if below epsilon.

        Args:
            loss (float): current loss.
            model (torch model): the current model.
        """

        if loss > self.bestScore - self.epsilon:

            self.counter += 1
            if not self.silent:
                print('EarlyStopping counter: {:d}/{:d}'.format(self.counter,self.patience))

            if self.counter >= self.patience:
                self.earlyStop = True

        else:   

            self.counter = 0
            self.bestScore = loss

            if self.keepBest:
                self.bestModel = copy.deepcopy(model)


In [4]:
class matchesDataset(Dataset):

    """ Extend pytorch Dataset class to include cleaning and training set creation, """
    
    def __init__(self, matches, results):

        self.matches = torch.tensor(matches, dtype=torch.float32)
        self.results = torch.tensor(results, dtype=torch.float32)

    def __len__(self):

        """ Returns the len of the training sample. """
        
        return len(self.matches)
        

    def __getitem__(self, index): 

        """ Returns a word, a context word and a list of negative words for training for a given index. 

        Args:
            index (int): index for the word selection.

        Returns:
            (string, string, list of strings): selected word, context word and a randomly drawn list 
                                               of negative words.

        """
        return self.matches[index], self.results[index]

In [5]:
#https://towardsdatascience.com/quirky-keras-custom-and-asymmetric-loss-functions-for-keras-in-r-a8b5271171fe
#weighted asimmetric square error, errors by going below the value (not seeing a goal when it's there) are weighted more

def WSE(output, target, a=1.5, b=.5):
    loss = torch.mean(a*torch.minimum(torch.zeros(output.shape[1]),output - target)**2+\
                      b*torch.maximum(torch.zeros(output.shape[1]),output - target)**2)      
    return loss

def WSEl1(output, target, a=1.5, b=.5):
    loss = torch.mean(a*torch.abs(torch.minimum(torch.zeros(output.shape[1]),output - target))+\
                      b*torch.abs(torch.maximum(torch.zeros(output.shape[1]),output - target)))      
    return loss

def WSE2(output, target, a=1.5, b=.5):
    loss = np.mean(a*np.minimum(np.zeros(output.shape[0]),output - target)**2+\
                      b*np.maximum(np.zeros(output.shape[0]),output - target)**2)      
    return loss

def WSEl12(output, target, a=1.5, b=.5):
    loss = np.mean(a*np.abs(np.minimum(np.zeros(output.shape[0]),output - target))+\
                      b*np.abs(np.maximum(np.zeros(output.shape[0]),output - target)))      
    return loss

In [6]:
def normalize_mins(vec):
    for i in range(vec.shape[0]):
        vec[i][::8]=vec[i][::8]/90

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def NormalizeMatrix(data):   
    for i in range(data.shape[1]):
        data[:,i] = NormalizeData(data[:,i])

    
with open(r'/Users/federico comitani/GitHub/sodakick/data/inp_220223.pkl', 'rb') as pk:
    inp=pickle.load(pk)
with open(r'/Users/federico comitani/GitHub/sodakick/data/out_220223.pkl', 'rb') as pk:
    out=np.array(pickle.load(pk),dtype=float)
    
### skipping norm for now since it's already tsvd 
#NormalizeMatrix(inp)
#np.nan_to_num(inp, copy=False)

normalize_mins(out)

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
         inp, out, test_size=0.2, random_state=32)

In [8]:
class Net(nn.Module):
    
    def __init__(self, num_layers, num_nodes, scaling_factor, num_nodes_out, final_activation, batch_norm, dropout):
        super(Net, self).__init__()
        
        self.fc = []
        self.lr = []
        self.bn = []
        self.dp = []
        self.fact = final_activation
        self.nl = num_layers
        power=0
        
        for i in range(self.nl):
            self.fc.append(nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes*(scaling_factor**(power+1)))))
            self.lr.append(nn.LeakyReLU())
            
            if batch_norm:
                self.bn.append(nn.BatchNorm(int(num_nodes*(scaling_factor**(power+1)))))

            if dropout>0.0:
                self.dp.append(nn.Dropout(dropout))
                
            power+=1
        
        self.oupt = nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes_out))
    
    def forward(self, x):
        z = x
        for i in range(self.nl):
            
            z = self.fc[i](z)
            
            if batch_norm:
                z = self.bn[i](z)
            
            z = self.lr[i](z)
        
            if dropout>0.0:
                z = self.dp[i](z)
                
        if self.fact is not None:
            z = self.fact(z)
        
        z = self.oupt(z)
        
        return z
    
    def clp(self):
        with torch.no_grad():
            for i in range(self.nl):
                self.fc[i].weight.copy_ (self.fc[i].weight.data.clamp(min=0)) 
            self.oupt.weight.copy_ (self.oupt.weight.data.clamp(min=0))

In [9]:
def train(config, model=Net, silent=True, checkpoint_dir=None):
    
    phases = ['train','val']
    
    #x_train, x_test, y_train, y_test = data[0], data[1], data[2], data[3]
    
    training_set = matchesDataset(x_train, y_train)
    trainBatch = torch.utils.data.DataLoader(training_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    validation_set = matchesDataset(x_test, y_test)
    valBatch = torch.utils.data.DataLoader(validation_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    earlStop = EarlyStopping(patience=config['patience'], keepBest=False)
    
    net = model(config['num_layers'], config['num_nodes'], config['scaling_factor'], 
                config['num_nodes_out'], config['final_activation'], config['batch_norm'], config['dropout'])
    
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)
    
    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    if config['optim']=='adam':
        optimizer = Adam(net.parameters(), lr=config['lr'])
    elif config['optim']=='adagrad':
        optimizer = Adagrad(net.parameters(), lr=config['lr'])
    else:
        print('optim error')
        return
    
    
    losses=[[],[]]
    mses=[]
    diffs=[]
    exit=False
    
    #for epoch in tqdm(range(epochs), desc='Epoch'):
    for epoch in range(config['epochs']):

        if exit:
            break
            
        for phase in phases:
            if phase == 'train':
                net.train(True) 

                """ Run the training of the model. """    

                losses_batch=[]
                for batchNum, batch in enumerate(trainBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """
                    
                    loss = config['loss_f'](net(x), y)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    if config['clip']:
                        net.clp()

                    losses_batch.append(loss)

                """ Early stop check. """

                earlStop(loss, net)
                finalepoch = epoch
                
                if earlStop.earlyStop:

                    if not silent:
                        print('Limit loss improvement reached, stopping the training.')
                        
                    exit=True 
                
                #losses[0].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
            else:
                net.train(False)
                net.eval()
                
                val_loss=0
                val_mse=0
      
                losses_batch=[]
                for batchNum, batch in enumerate(valBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """

                    optimizer.zero_grad()
                    output=net(x)
                    target=y
                    loss = config['loss_f'](output, target)

                    #losses_batch.append(loss)
                    val_loss+=loss.detach().numpy()
                    val_mse+=nn.MSELoss()(output, target).detach().numpy()
                    
                #losses[1].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
                
                with tune.checkpoint_dir(epoch) as checkpoint_dir:
                    path = os.path.join(checkpoint_dir, "checkpoint")
                    torch.save((net.state_dict(), optimizer.state_dict()), path)

                tune.report(loss=(val_loss/batchNum), mse=(val_mse/batchNum))
                #tune.report(loss=torch.mean(torch.stack(losses_batch)))

In [10]:
def revert_output(output,lineup=None):

    reframe=pd.DataFrame(output.reshape(48,8),
                 columns=['minutes','goals','assists','cards_yellow','cards_red','own_goals','goals_against','saves'])
    
    reframe[reframe<0] = 0
    if lineup is not None:
        reframe.index=lineup
        reframe.drop([x for x in reframe.index if x.startswith('dummy')], axis=0, inplace=True)
        
    reframe['minutes']*=90
    byteamframe=pd.concat([reframe.iloc[:24,:].sum(axis=0),reframe.iloc[24:,:].sum(axis=0)], axis=1).T
    
    return reframe, byteamframe[byteamframe.columns[1:]]

In [11]:
print('Baseline WSE: {:.3f}'.format(WSE2(np.array([0]*out[0].shape[0]),out[0])))
print('Baseline WSE l1: {:.3f}'.format(WSEl12(np.array([0]*out[0].shape[0]),out[0])))
print('Baseline MSE: {:.3f}'.format(WSE2(np.array([0]*out[0].shape[0]),out[0], a=1, b=1)))
print('Baseline MSE l1: {:.3f}'.format(WSEl12(np.array([0]*out[0].shape[0]),out[0], a=1, b=1)))

print(np.abs(out[1]-out[10]).sum())
print(np.abs(out[50]-out[60]).sum())
print(np.abs(out[100]-out[110]).sum())

Baseline WSE: 0.302
Baseline WSE l1: 0.176
Baseline MSE: 0.201
Baseline MSE l1: 0.117
73.33333333333334
49.13333333333334
70.8


In [12]:
def run_rtune(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    
    config = {
        "num_layers": tune.choice([1, 2, 3]),
        "num_nodes": inp.shape[1],
        "scaling_factor": tune.sample_from(lambda _: np.random.uniform(low=0.5, high=1.0001)),
        "num_nodes_out": out.shape[1], 
        "final_activation" : tune.choice([torch.tanh, None]),
        "clip": tune.choice([True, False]),
        "batch_size": tune.choice([16, 32, 64, 128]),
        "loss_f": tune.choice([WSE, nn.MSELoss()]), #, nn.L1Loss()
        "optim": tune.choice(['adam', 'adagrad']),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_norm": tune.choice([True, False]),
        "dropout": tune.choice([0.0,0.1,0.2,0.3,0.4,0.5]),#tune.sample_from(lambda _: np.random.uniform(low=0.0, high=.6)),
        "shuffle": True,
        "num_workers": 4,
        "patience": 10,
        "epochs": 100
    }
    
    scheduler = ASHAScheduler(
        metric="mse",#"loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    
    reporter = CLIReporter(
        metric_columns=["loss", "mse", "training_iteration"])
    
    result = tune.run(
        train,
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation mse: {}".format(
        best_trial.last_result["mse"]))

    best_trained_model = Net(best_trial.config['num_layers'], 
                             best_trial.config['num_nodes'], best_trial.config['scaling_factor'], 
                             best_trial.config['num_nodes_out'], best_trial.config['final_activation'])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    return best_trained_model
    #test_acc = test_accuracy(best_trained_model, device)
    #print("Best trial test set accuracy: {}".format(test_acc))

In [13]:
btm = run_rtune(num_samples=50, max_num_epochs=50, gpus_per_trial=0)
#https://docs.ray.io/en/latest/tune/api_docs/suggestion.html#tune-search-alg

2022-02-27 11:39:13,148	INFO services.py:1247 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


ConnectionError: Error 41 while writing to socket. Protocol wrong type for socket.

In [22]:
pred=btm(torch.Tensor(inp)).detach().cpu().numpy()[3]
cats=['minutes','goals','assists','cards_yellow','cards_red','own_goals']+['goals_against','saves']

reframe, byteamframe = revert_output(pred)
print(byteamframe)
reframe, byteamframe = revert_output(out[3])
print(byteamframe)

      goals   assists  cards_yellow  cards_red  own_goals  goals_against  \
0  0.524458  0.038081      0.173317   0.519007   0.509316       1.307664   
1  0.169901  0.518472      0.627050   0.271117   0.254911       1.080686   

      saves  
0  1.536534  
1  0.516705  
   goals  assists  cards_yellow  cards_red  own_goals  goals_against  saves
0    4.0      4.0           4.0        0.0        0.0            1.0    2.0
1    1.0      1.0           2.0        0.0        0.0            4.0    2.0
