In [1]:
import pandas as pd

import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adagrad, Adam, RMSprop, Adadelta
import torch.nn as nn
import torch.nn.functional as F

from ray import tune

import numpy as np

In [3]:
class EarlyStopping:

    """ Stops the training if loss doesn't improve after a given number of epochs. """

    def __init__(self, patience=3, epsilon=1e-5, keepBest=True, silent=True):

        """
        Args:
            patience (int): Number of epochs without change before stopping the learning (default 3).
            epsilon (float): Minimum change in loss to be considered for early stopping (default 1e-5).
            keepBest (bool): Keep track of the best model (memory consuming).
        """

        self.patience = patience
        self.epsilon = epsilon
        self.counter = 0
        
        self.bestScore = np.inf
     
        self.keepBest = keepBest 
        self.bestModel = None

        self.earlyStop = False
        self.silent = silent

    def __call__(self, loss, model):


        """ Evaluate the loss change between epochs and activates early stop if below epsilon.

        Args:
            loss (float): current loss.
            model (torch model): the current model.
        """

        if loss > self.bestScore - self.epsilon:

            self.counter += 1
            if not self.silent:
                print('EarlyStopping counter: {:d}/{:d}'.format(self.counter,self.patience))

            if self.counter >= self.patience:
                self.earlyStop = True

        else:   

            self.counter = 0
            self.bestScore = loss

            if self.keepBest:
                self.bestModel = copy.deepcopy(model)


In [4]:
class matchesDataset(Dataset):

    """ Extend pytorch Dataset class to include cleaning and training set creation, """
    
    def __init__(self, matches, results):

        self.matches = torch.tensor(matches, dtype=torch.float32)
        self.results = torch.tensor(results, dtype=torch.float32)

    def __len__(self):

        """ Returns the len of the training sample. """
        
        return len(self.matches)
        

    def __getitem__(self, index): 

        """ Returns a word, a context word and a list of negative words for training for a given index. 

        Args:
            index (int): index for the word selection.

        Returns:
            (string, string, list of strings): selected word, context word and a randomly drawn list 
                                               of negative words.

        """
        return self.matches[index], self.results[index]

In [13]:
class LogCoshLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, y_t, y_prime_t):
        ey_t = y_t - y_prime_t
        return torch.mean(torch.log(torch.cosh(ey_t + 1e-12)))

In [5]:
#https://towardsdatascience.com/quirky-keras-custom-and-asymmetric-loss-functions-for-keras-in-r-a8b5271171fe
def WSE(output, target, a=1.5, b=.5):
    loss = torch.mean(a*torch.minimum(torch.zeros(output.shape[1]),output - target)**2+\
                      b*torch.maximum(torch.zeros(output.shape[1]),output - target)**2)      
    return loss

def WSEl1(output, target, a=1.5, b=.5):
    loss = torch.mean(a*torch.abs(torch.minimum(torch.zeros(output.shape[1]),output - target))+\
                      b*torch.abs(torch.maximum(torch.zeros(output.shape[1]),output - target)))      
    return loss

def WSE2(output, target, a=1.5, b=.5):
    loss = np.mean(a*np.minimum(np.zeros(output.shape[0]),output - target)**2+\
                      b*np.maximum(np.zeros(output.shape[0]),output - target)**2)      
    return loss

def WSEl12(output, target, a=1.5, b=.5):
    loss = np.mean(a*np.abs(np.minimum(np.zeros(output.shape[0]),output - target))+\
                      b*np.abs(np.maximum(np.zeros(output.shape[0]),output - target)))      
    return loss

In [6]:
class Net(nn.Module):
    
    def __init__(self, num_layers, num_nodes, scaling_factor, num_nodes_out, final_activation, dropout_percent, batchnorm):
        super(Net, self).__init__()
        
        self.fc = []
        self.lr = []
        self.fact = final_activation
        self.nl = num_layers
        self.bn = []
        self.dp = []
        power=0
        
        for i in np.arange(self.nl, dtype = int):
            self.fc.append(nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes*(scaling_factor**(power+1)))))
            self.lr.append(nn.LeakyReLU())
            if batchnorm:
                self.bn.append(nn.BatchNorm1d(int(num_nodes*(scaling_factor**(power+1)))))
            if dropout_percent>0:
                self.dp.append(nn.Dropout(dropout_percent))
            power+=1
            
        self.oupt = nn.Linear(int(num_nodes*(scaling_factor**power)), int(num_nodes_out))
     
    def reset_weights(self):

        """ Resets network weights according to chosen distribution. """

        for f in self.fc:
            nn.init.xavier_uniform_(f.weight, gain=1)
            
    def forward(self, x):
        z = x
        for i in range(self.nl):
            z = self.fc[i](z)
            if len(self.bn)>0:
                z=self.bn[i](z)
            z=self.lr[i](z)
            if len(self.dp)>0:
                z=self.dp[i](z)
                
        if self.fact is not None:
            z = self.oupt(self.fact(z))
        else:
            z = self.oupt(z)
        return z
    
    def clp(self):
        with torch.no_grad():
            for i in range(self.nl):
                self.fc[i].weight.copy_ (self.fc[i].weight.data.clamp(min=0)) 
            self.oupt.weight.copy_ (self.oupt.weight.data.clamp(min=0))

In [6]:
class AE(nn.Module):
    
    def __init__(self, num_layers, num_nodes, embed_dim, num_nodes_out, final_activation, dropout_percent, batchnorm):
        super(AE, self).__init__()
        
        self.fc = []
        self.lr = []
        self.fact = final_activation
        self.nl = num_layers
        self.bn = []
        self.dp = []
                
        for i in np.arange(self.nl, dtype = int):
            self.fc.append(nn.Linear(int(num_nodes+i*1.0*(embed_dim-num_nodes)/(self.nl)), int(num_nodes+(i+1)*1.0*(embed_dim-num_nodes)/(self.nl))))
            print(int(num_nodes+i*1.0*(embed_dim-num_nodes)/(self.nl+1)), int(num_nodes+(i+1)*1.0*(embed_dim-num_nodes)/(self.nl)))
            self.lr.append(nn.LeakyReLU())
            if batchnorm:
                self.bn.append(nn.BatchNorm1d(int(num_nodes+(i+1)*1.0*(embed_dim-num_nodes)/(self.nl))))
            if dropout_percent>0:
                self.dp.append(nn.Dropout(dropout_percent))
                
        print()
        
        for i in np.arange(self.nl, dtype = int)[:-1][::-1]:
            self.fc.append(nn.Linear(int(num_nodes_out+(i+2)*1.0*(embed_dim-num_nodes_out)/self.nl), int(num_nodes_out+(i+1)*1.0*(embed_dim-num_nodes_out)/self.nl)))
            print(int(num_nodes_out+(i+2)*1.0*(embed_dim-num_nodes_out)/self.nl), int(num_nodes_out+(i+1)*1.0*(embed_dim-num_nodes_out)/self.nl))
            self.lr.append(nn.LeakyReLU())
            if batchnorm:
                self.bn.append(nn.BatchNorm1d(int(num_nodes_out+(i+1)*1.0*(embed_dim-num_nodes_out)/self.nl)))
            if dropout_percent>0:
                self.dp.append(nn.Dropout(dropout_percent))
         
        self.oupt = nn.Linear(int(num_nodes_out+1.0*(embed_dim-num_nodes_out)/self.nl), int(num_nodes_out))
        print(int(num_nodes_out+1.0*(embed_dim-num_nodes_out)/self.nl), int(num_nodes_out))
        
    def reset_weights(self):

        """ Resets network weights according to chosen distribution. """

        for f in self.fc:
            nn.init.xavier_uniform_(f.weight, gain=1)
            
    def forward(self, x):
        z = x
        for i in range(len(self.fc)):
            z = self.fc[i](z)
            if len(self.bn)>0:
                z=self.bn[i](z)
            z=self.lr[i](z)
            if len(self.dp)>0:
                z=self.dp[i](z)
                
        if self.fact is not None:
            z = self.oupt(self.fact(z))
        else:
            z = self.oupt(z)
        return z
    
    def clp(self):
        with torch.no_grad():
            for i in range(self.nl):
                self.fc[i].weight.copy_ (self.fc[i].weight.data.clamp(min=0)) 
            self.oupt.weight.copy_ (self.oupt.weight.data.clamp(min=0))

In [None]:
from numpy import floor

def conv_out_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):

	if isinstance(h_w, list):
	    if type(kernel_size) is not tuple:
	        kernel_size = (kernel_size, kernel_size)
	    h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
	    w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
	    return h, w
	else:
		return floor( ((h_w + (2 * pad) - ( dilation * (kernel_size - 1) ) - 1 )/ stride) + 1)

In [12]:
class CNet(nn.Module):
    
    def __init__(self, inp_nodes, conv_layers, conv_nodes, conv_scaling, pooling, conv_kernels, conv_stride, 
                 dense_layers, dense_nodes, dense_scaling, out_nodes, final_activation, dropout_percent, batchnorm):
        super(CNet, self).__init__()
        
        self.fc = []
        self.cv = []
        self.lr_cv = []
        self.lr_fc = []
        self.pl = []
        self.fact = final_activation
        self.cl = conv_layers
        self.dl = dense_layers
        self.bn_fc = []
        self.bn_cv = []
        self.dp = []
        
        if pooling<=0:
            pooling=1
            
        power=0
        for i in range(self.cl):
            if i==0:
                self.cv.append(nn.Conv1d(in_channels = 1,
                                    out_channels = conv_nodes,
                                    kernel_size = conv_kernels,
                                    stride = conv_stride))
                cos=int(conv_out_shape(inp_nodes,
                                    kernel_size = conv_kernels,
                                    stride = conv_stride)/pooling)
            else:
                self.cv.append(nn.Conv1d(in_channels =  int(conv_nodes*(conv_scaling**(power-1))),
                                    out_channels = int(conv_nodes*(conv_scaling**(power-1))),
                                    kernel_size = conv_kernels,
                                    stride = conv_stride))
                cos=int(conv_out_shape(cos,
                                    kernel_size = conv_kernels,
                                    stride = conv_stride)/pooling)
                
            self.lr_cv.append(nn.LeakyReLU())
            #model.append(nn.Tanh())

            #scqa
            if pooling>1:
                self.pl.append(nn.MaxPool1d(pooling))
            if batchnorm:
                self.bn_cv.append(nn.BatchNorm1d(conv_nodes))
                                                            
            power+=1
            
        self.flat = nn.Flatten()
                
        power=0
        for j in range(self.dl): 
            if j==0:
                self.fc.append(nn.Linear(in_features = int(cos*conv_nodes),
                                    out_features = dense_nodes))
            else:
                self.fc.append(nn.Linear(in_features = int(dense_nodes*(dense_scaling**(power-1))),
                                    out_features = int(dense_nodes*(dense_scaling**power))))
     
            self.lr_fc.append(nn.LeakyReLU())
                                                           
            if batchnorm:
                self.bn_fc.append(nn.BatchNorm1d(int(dense_nodes*(dense_scaling**power))))
            if dropout_percent>0:
                self.dp.append(nn.Dropout(dropout_percent))
            power+=1
            
        self.oupt = nn.Linear(int(dense_nodes*(dense_scaling**(power-1))), int(out_nodes))
     
    def reset_weights(self):

        """ Resets network weights according to chosen distribution. """

        for f in self.cv:
            nn.init.xavier_uniform_(f.weight, gain=1)
        for f in self.fc:
            nn.init.xavier_uniform_(f.weight, gain=1)
            
    def forward(self, x):
        z = x
        for i in range(self.cl):        
            z = self.cv[i](z)
            if len(self.pl)>0:
                z=self.pl[i](z)
            if len(self.bn_cv)>0:
                z=self.bn_cv[i](z)
            z=self.lr_cv[i](z)     
                        
        z = self.flat(z)
                                                           
        for i in range(self.dl):
            z = self.fc[i](z)
            if len(self.bn_fc)>0:
                z=self.bn_fc[i](z)
            z=self.lr_fc[i](z)
            if len(self.dp)>0:
                z=self.dp[i](z)
                
        if self.fact is not None:
            z = self.oupt(self.fact(z))
        else:
            z = self.oupt(z)
        return z
    
    def clp(self):
        with torch.no_grad():
            for i in range(self.nl):
                self.fc[i].weight.copy_ (self.fc[i].weight.data.clamp(min=0)) 
            self.oupt.weight.copy_ (self.oupt.weight.data.clamp(min=0))

In [7]:
def train(config, model=Net, silent=True, checkpoint_dir=None):
    
    phases = ['train','val']
    
    #x_train, x_test, y_train, y_test = data[0], data[1], data[2], data[3]
    
    training_set = matchesDataset(x_train, y_train)
    trainBatch = torch.utils.data.DataLoader(training_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    validation_set = matchesDataset(x_test, y_test)
    valBatch = torch.utils.data.DataLoader(validation_set, batch_size=config['batch_size'], shuffle=config['shuffle'], num_workers=config['num_workers'])

    earlStop = EarlyStopping(patience=config['patience'], keepBest=False)
    
    if 'scaling_factor' in config:
        net = model(config['num_layers'], config['num_nodes'], config['scaling_factor'], 
                    config['num_nodes_out'], config['final_activation'])
    elif 'embed_dim' in config:
        net = model(config['num_layers'], config['num_nodes'], config['embed_dim'], 
                    config['num_nodes_out'], config['final_activation'])
        
        
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)
    
    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    if config['optim']=='adam':
        optimizer = Adam(net.parameters(), lr=config['lr'])
    elif config['optim']=='adagrad':
        optimizer = Adagrad(net.parameters(), lr=config['lr'])
    elif config['optim']=='adadelta':
        optimizer = Adadelta(net.parameters(), lr=config['lr'])
    elif config['optim']=='rmsprop':
        optimizer = RMSprop(net.parameters(), lr=config['lr']) 
    else:
        print('optim error')
        return
    
    
    losses=[[],[]]
    mses=[]
    diffs=[]
    exit=False
    
    #for epoch in tqdm(range(epochs), desc='Epoch'):
    for epoch in range(config['epochs']):

        if exit:
            break
            
        for phase in phases:
            if phase == 'train':
                net.train(True) 

                """ Run the training of the model. """    

                losses_batch=[]
                for batchNum, batch in enumerate(trainBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """
                    
                    loss = config['loss_f'](net(x), y)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    if config['clip']:
                        net.clp()

                    losses_batch.append(loss)

                """ Early stop check. """

                earlStop(loss, net)
                finalepoch = epoch
                
                if earlStop.earlyStop:

                    if not silent:
                        print('Limit loss improvement reached, stopping the training.')
                        
                    exit=True 
                
                #losses[0].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
            else:
                net.train(False)
                net.eval()
                
                val_loss=0
                val_mse=0
      
                losses_batch=[]
                for batchNum, batch in enumerate(valBatch):

                    x = batch[0]
                    y = batch[1]

                    """ Move batches to GPU if available. """

                    if torch.cuda.is_available():
                        x = x.cuda()
                        y = y.cuda()

                    """ Core of training. """

                    optimizer.zero_grad()
                    output=net(x)
                    target=y
                    loss = config['loss_f'](output, target)

                    #losses_batch.append(loss)
                    val_loss+=loss.detach().numpy()
                    val_mse+=nn.MSELoss()(output, target).detach().numpy()
                    
                #losses[1].append(torch.mean(torch.stack(losses_batch)).detach().cpu().numpy())
                
                with tune.checkpoint_dir(epoch) as checkpoint_dir:
                    path = os.path.join(checkpoint_dir, "checkpoint")
                    torch.save((net.state_dict(), optimizer.state_dict()), path)

                tune.report(loss=(val_loss/batchNum), mse=(val_mse/batchNum))
                #tune.report(loss=torch.mean(torch.stack(losses_batch)))

In [8]:
def revert_output(output,lineup=None,dropdummy=False):

    reframe=pd.DataFrame(copy.deepcopy(output.reshape(48,8)),
                 columns=['minutes','goals','assists','cards_yellow','cards_red','own_goals','goals_against','saves'])
    
    reframe[reframe<0] = 0
    reframe['team'] = 0
    reframe['team'].iloc[25:]=1
    reframe['minutes']*=90
    
    if lineup is not None:
        reframe.index=lineup
        if dropdummy:
            reframe.drop([x for x in reframe.index if x.startswith('dummy')], axis=0, inplace=True)
  
    byteamframe=pd.concat([reframe[reframe['team']==0].sum(axis=0),reframe[reframe['team']==1].sum(axis=0)], axis=1).T
    
    return reframe, byteamframe[byteamframe.columns[1:-1]]