In [1]:
import pandas as pd
import torch
import numpy as np
import random
from random import shuffle

import wget
import os
import aiohttp
import asyncio
from fpl import FPL
from torch.utils.data import TensorDataset, DataLoader
from player import Player
from team import Team
from data_processor import get_fpl, get_players, get_teams, get_training_datasets, get_all_player_features
from matplotlib import pyplot as plt
import seaborn as sns
import torch.nn as nn
import math
import torch.optim as optim

In [2]:
fpl = await get_fpl()
team_feature_names = ["npxGA"]
teams = get_teams(team_feature_names=team_feature_names, visualize=False)
player_feature_names = ["total_points", "ict_index", "clean_sheets", "saves", "assists", "yellow_cards"]
players = await get_players(player_feature_names, team_feature_names, visualize=False, num_players=640)

In [40]:
def get_timeseries_dataset(players, input_window=3, input_features=len(player_feature_names)):
    '''
        Args
            players - List of players 
            input_window - input window 
            input_features - number of input features
        Returns
            Time series dataset that is a numpy array of shape (N, input_window, input_features)
    '''
    X, Y = [], []
    for player in players:
        player_points = player.player_features # (input_features, timesteps)
        for i in range(player_points.shape[1] - input_window):
            x = player_points[:,i:i+input_window]
            y = player_points[0, i+input_window]
            X.append(x), Y.append(y)
    X, Y = np.array(X).astype(float), np.array(Y).astype(float)
    X, Y = torch.tensor(X), torch.tensor(Y)
    return X, Y
 

X, Y =  get_timeseries_dataset(players)
X.shape, Y.shape # (N, input_features, context_window)

(torch.Size([25415, 6, 3]), torch.Size([25415]))

In [41]:
def normalize(x, epislon=1e-6, is_scalar=False):
    '''
    Args
        x - numpy array of shape (N, L, D)
    Returns
        normalized_x - normalized numpy array of shape (N, L, D). Normalized along dimension D
    '''
    if is_scalar:
        return (x - x.mean()) / (x.std())
    means = torch.mean(x, axis=[0, 1])
    stds = torch.std(x, axis=[0, 1])
    return (x - means) / stds

X = X.permute((0, 2, 1))
X, Y = normalize(X), normalize(Y, is_scalar=True)
X = X.permute((0, 2, 1))
X.shape, Y.shape

(torch.Size([25415, 6, 3]), torch.Size([25415]))

# Dynamic data augmentation
- Input window is x [0:7]. 2 elements are set to 0 at random
- Target is sampled form x[7:9]
- Why - I want an nearly infinite dataset

In [42]:
class AvgModel(nn.Module):
    def __init__(self):
        pass 
    def forward(self, x):
        # shape of x is (Batch_size, L, D)
        return x[:,:, 0].mean(dim=-1)

class PrevModel(nn.Module):
    def __init__(self):
        pass 
    def forward(self, x):
        # shape of x is (Batch_size, L, D)
        return x[:,-1, 0]

class LinearModel(nn.Module):
    def __init__(self, input_window):
        super(LinearModel, self).__init__()
        self.input_window = input_window
        self.fc1 = nn.Linear(input_window, 1).double()
    
    def forward(self, x):
        # shape of x is (Batch_size, L * D)
        return self.fc1(x).view((-1, ))


class NonLinearModel(nn.Module):
    def __init__(self, input_window):
        super(NonLinearModel, self).__init__()
        self.model = nn.Sequential(*[nn.Linear(input_window, 100).double(),
                                     nn.ReLU(),
                                     nn.Linear(100, 100).double(),
                                     nn.ReLU(),
                                     nn.Linear(100, 1).double()])
    def forward(self, x):
        # shape of x is (Batch_size, L * D)
        return self.model(x)


input_tensor = torch.tensor([4, 6, 8]).double().reshape((1, 3, 1))
print(input_tensor)

avg_model = AvgModel()
print(avg_model.forward(input_tensor))

prev_model = PrevModel()
print(prev_model.forward(input_tensor))

linear_model = LinearModel(input_window=3)
print(linear_model.forward((input_tensor.reshape((-1, 3)))))

non_linear_model = NonLinearModel(input_window=3)
print(non_linear_model.forward((input_tensor.reshape((-1, 3)))))

tensor([[[4.],
         [6.],
         [8.]]], dtype=torch.float64)
tensor([6.], dtype=torch.float64)
tensor([8.], dtype=torch.float64)
tensor([7.9072], dtype=torch.float64, grad_fn=<ViewBackward>)
tensor([[0.8767]], dtype=torch.float64, grad_fn=<AddmmBackward>)


In [43]:

train_indices = int(0.8 * len(X))
X_train, X_test = X[:train_indices], X[train_indices:]
Y_train, Y_test = Y[:train_indices], Y[train_indices:]

train_loader = DataLoader(TensorDataset(X_train, Y_train), batch_size=25)
test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=25)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7fac066aa070>

In [44]:
def fit(model, train_loader, epochs=10, input_window=3, len_features=len(player_feature_names)):
    optimizer = optim.Adam(model.parameters(), 1e-3)
    for epoch in range(epochs): 
        for (inputs, outputs) in train_loader:
            optimizer.zero_grad()
            if isinstance(model, LinearModel) or isinstance(model, NonLinearModel):
                inputs = inputs.reshape((-1, input_window * len_features))
            predictions = model.forward(inputs)
            residual = (predictions - outputs)
            loss = (residual * residual).sum() 
            loss.backward()
            optimizer.step()

def eval(model, test_loader,input_window = 3, len_features=len(player_feature_names)):
    sum_loss = 0
    count_loss = 0
    
    for (inputs, outputs)  in test_loader:
        if isinstance(model, LinearModel) or isinstance(model, NonLinearModel):
            inputs = inputs.reshape((-1, input_window * len_features))
        predictions = model.forward(inputs)
        sum_loss += (predictions - outputs).abs().mean().item()
        count_loss += 1

    return sum_loss / count_loss

In [46]:
input_window = 3
avg_losses = []
prev_losses = []
linear_losses = []
non_linear_losses = []
for seed in [5, 10, 25, 35]:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    avg_model = AvgModel()
    prev_model = PrevModel()
    linear_model = LinearModel(input_window * len(player_feature_names))
    fit(linear_model, train_loader)
    avg_losses.append(eval(avg_model, test_loader))
    prev_losses.append(eval(prev_model, test_loader))
    linear_losses.append(eval(linear_model, test_loader))
print(sum(avg_losses) / len(avg_losses))
print(sum(prev_losses) / len(prev_losses))
print(sum(linear_losses) / len(linear_losses))
'asd'

0.533986998801052
0.7013903232227673
0.48374184837916046


'asd'