In [1]:
import pandas as pd
import torch
import numpy as np
import random
from random import shuffle
from matplotlib import pyplot as plt
import seaborn as sns
from torch.utils.data import TensorDataset, DataLoader
import wget
import os
import aiohttp
import asyncio
np.random.seed(17)
random.seed(17)
torch.manual_seed(17)
from fpl import FPL
from player import Player
from team import Team
from data_processor import get_fpl, get_players, get_teams, get_training_datasets
from models import PreviousScoreModel, PlayerAvgScoreModel, LinearModel, HierarchialLinearModel, NonLinearModel
import torch.nn as nn
import torch.optim as optim
seed = 5
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f1449c5b850>

In [2]:
fpl = await get_fpl()
opponent_feature_names = ["npxG","npxGA"]
player_feature_names = ["total_points", "ict_index", "clean_sheets", "saves", "assists", "goals_scored"]
teams = get_teams(team_feature_names=opponent_feature_names, visualize=False)
players = await get_players(player_feature_names, opponent_feature_names, visualize=False, num_players=580)
len(players)

579

In [5]:
if torch.cuda.device_count() >= 1:
    torch.set_default_tensor_type(torch.cuda.DoubleTensor)
else:
    torch.set_default_tensor_type(torch.DoubleTensor)

In [39]:
def normalize(x, is_scalar=False):
    '''
        Args 
            x - input 
        Returns
            normalized input
    '''
    if is_scalar:
        x = x.reshape((-1, )).double()
        means = torch.mean(x)
        stds = torch.std(x)
        return (x - means) / stds
    else:
        x = x.double()
        # (N, D, L)
        x = x.permute(0, 2, 1)
        input_means = torch.mean(x, dim=(0, 1))
        input_stds = torch.std(x, dim=(0, 1))
        x = (x - input_means) / (input_stds)
        x = x.permute(0, 2, 1)
        return x

def get_masked_training_datasets(players, teams, window_size=7, batch_size=50):
    '''
        Args
            players - list of players
            teams - list of teams
        Returns
            Train and test data loaders
            Input - (Batch_size, num_features, window)

            Build up datasets in numpy
            Make test train splits and feed it to pytorch
    '''
    X = []
    for player in players:
        for i in range(player.player_features.shape[1] - window_size):
            x = player.player_features[:,i:i+window_size]
            X.append(x)

    X = np.array(X).astype(float)
    indices = np.random.permutation(range(len(X)))
    train_length = int(0.8 * len(X))
    X = torch.tensor(X).double()
    X = normalize(X)
    X_train, X_test = X[indices[:train_length]], X[indices[train_length:]] 
    train_loader = DataLoader(TensorDataset(X_train,), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test,), batch_size=batch_size)
    return train_loader, test_loader


train_loader, test_loader = get_masked_training_datasets(players, teams)

In [40]:
normalize(torch.tensor([0, 1, 2, 3]), is_scalar=True)

tensor([-1.1619, -0.3873,  0.3873,  1.1619])

In [41]:
class AvgModel(nn.Module):
    def forward(self, x):
        '''
            Args 
                x - (N, D, L)
        '''
        return x[:,0,:].mean(dim=1).reshape((-1, ))


class PrevModel(nn.Module):
    def forward(self, x):
        '''
        '''
        return x[:,0,-1].reshape((-1, ))

class LinearModel(nn.Module):
    def __init__(self, window_size=4, num_features=5):
        super(LinearModel, self).__init__()
        self.dim = window_size * num_features
        self.fc1 = nn.Linear(self.dim, 1)
    
    def forward(self, x):
        x = x.reshape((x.shape[0], self.dim))
        return self.fc1(x).reshape((-1, ))

class RNNModel(nn.Module):
    def __init__(self, window_size=4, num_features=5, hidden_dim=128):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(num_features, hidden_dim, 5).double()
        self.fc = nn.Linear(hidden_dim, 1).double()

    def forward(self, x):
        '''
            Args
                x - shape (N, D, L)
        '''
        x = x.permute(2, 0, 1)
        h = self.rnn(x)
        o = self.fc(h[-1][-1])
        return o.reshape((-1, ))


input_tensor = torch.tensor([[0, 2, 4, 6], [1, 3, 5, 7]]).reshape((2, 1, 4)).double()
prev_model = PrevModel().double()
avg_model = AvgModel().double()
linear_model = LinearModel(num_features=1).double()
rnn_model = RNNModel(num_features=1)
print(avg_model.forward(input_tensor))
print(prev_model.forward(input_tensor))
print(linear_model.forward(input_tensor))
print(rnn_model.forward(input_tensor).shape)
print(len(train_loader))
sum(p.numel() for p in rnn_model.parameters())

tensor([3., 4.])
tensor([6., 7.])
tensor([-2.3222, -3.4954], grad_fn=<ViewBackward>)
torch.Size([2])
432


148993

In [42]:
next(linear_model.parameters()).is_cuda

True

In [45]:
def fit(model, train_loader, fixed_window=False, epochs=100):
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    model.train()
    for epoch in range(epochs):
        for (x,) in train_loader:
            optimizer.zero_grad()
            # inputs shape (Batch, D, window_size)
            input_window = np.random.choice([3, 4, 5, 6])
            if fixed_window:
                input_window = fixed_window
            inputs = x[:,:,:input_window]
            outputs = x[:,0,input_window]
            predictions = model.forward(inputs)
            residual = (predictions - outputs)
            loss = (residual * residual).sum()
            loss.backward()
            optimizer.step()
    
def eval(model, test_loader, input_window=4):
    sum_loss, count_loss = 0, 0
    sum_corr, count_corr = 0, 0
    model.eval()
    for (x,) in train_loader:
        inputs = x[:,:,:input_window]
        outputs = x[:,0,input_window]
        predictions = (model.forward(inputs))
        assert(predictions.shape == outputs.shape)
        residual = (predictions - outputs)
        loss = (residual * residual).mean().item()
        outputs_numpy = outputs.detach().cpu().numpy()
        predictions_numpy = predictions.detach().cpu().numpy()
        corr = np.corrcoef(predictions_numpy, outputs_numpy)[0, 1]
        sum_loss += loss 
        count_loss += 1
        sum_corr += corr 
        count_corr += 1

    return sum_loss / count_loss, sum_corr / count_corr

avg_model = AvgModel().double()
prev_model = PrevModel().double()
linear_model = LinearModel(num_features=6).double()
rnn_model = RNNModel(num_features=6)
rnn_model2 = RNNModel(num_features=6)
print(next(rnn_model.parameters()).is_cuda)

fit(linear_model, train_loader, fixed_window=4)
fit(rnn_model, train_loader)

print(eval(avg_model, test_loader))
print(eval(prev_model, test_loader,))
print(eval(linear_model, test_loader))
for input_window in [3, 4, 5, 6]:
    fit(rnn_model2, train_loader, fixed_window=input_window)
    print(input_window, eval(rnn_model, test_loader, input_window))
    print(input_window, eval(rnn_model2, test_loader, input_window))

True
(0.8714682340672647, 0.4509088131740236)
(1.2929235676353332, 0.3619696974957797)
(0.7630330976300981, 0.49506311577399)
3 (0.7570025913699335, 0.5046315872929207)
3 (1.08212627509715, 0.39539576175620655)
4 (0.7522344274871899, 0.5107519022044266)
4 (0.5485834867494361, 0.6642931134073217)
5 (0.7550590677992967, 0.5182318750131822)
5 (0.9050983446480304, 0.42470192300774523)
6 (0.743435864836893, 0.5191839308616802)
6 (0.89453653174908, 0.4366920956012712)
