In [70]:
import pandas as pd
import torch
import numpy as np
import random
from random import shuffle
from matplotlib import pyplot as plt
import seaborn as sns
from torch.utils.data import TensorDataset, DataLoader
import wget
import os
import aiohttp
import asyncio
np.random.seed(17)
random.seed(17)
torch.manual_seed(17)
from fpl import FPL
from player import Player
from team import Team
from data_processor import get_fpl, get_players, get_teams, get_training_datasets
from models import PreviousScoreModel, PlayerAvgScoreModel, LinearModel, HierarchialLinearModel, NonLinearModel
import torch.nn as nn
import torch.optim as optim
seed = 5
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f34efdde5f0>

In [71]:
fpl = await get_fpl()
opponent_feature_names = ["npxG","npxGA"]
player_feature_names = ["total_points", "ict_index", "clean_sheets", "saves", "assists", "goals_scored"]
teams = get_teams(team_feature_names=opponent_feature_names, visualize=False)
players = await get_players(player_feature_names, opponent_feature_names, visualize=False, num_players=580)
len(players)

579

In [72]:
def normalize(x, is_scalar=False):
    '''
        Args 
            x - input 
        Returns
            normalized input
    '''
    if is_scalar:
        x = torch.tensor(np.array(x).astype(float).reshape((-1, ))).double()
        means = torch.mean(x)
        stds = torch.std(x)
        return (x - means) / stds
    else:
        x = torch.tensor(np.array(x).astype(float)).double()
        # (N, D, L)
        x = x.permute(0, 2, 1)
        input_means = torch.mean(x, dim=(0, 1))
        input_stds = torch.std(x, dim=(0, 1))
        x = (x - input_means) / (input_stds)
        x = x.permute(0, 2, 1)
        return x


def get_lean_training_datasets(players, teams, window_size=4, batch_size=50):
    '''
        Args
            players - list of players
            teams - list of teams
        Returns
            Train and test data loaders
            Input - (Batch_size, num_features, window)
            Output - (Batch_size, )

            Build up datasets in numpy
            Make test train splits and feed it to pytorch
    '''
    X, Y = [], []
    for player in players:
        for i in range(player.player_features.shape[1] - window_size):
            x = player.player_features[:,i:i+window_size]
            y = player.player_features[0, i+window_size]
            X.append(x)
            Y.append(y)
    X, Y = np.array(X).astype(float), np.array(Y).astype(float)
    indices = np.random.permutation(range(len(X)))
    train_length = int(0.8 * len(X))
    X, Y = torch.tensor(X).double(), torch.tensor(Y).double()
    X = normalize(X)
    Y = normalize(Y, is_scalar=True)
    X_train, X_test = X[indices[:train_length]], X[indices[train_length:]] 
    Y_train, Y_test = Y[indices[:train_length]], Y[indices[train_length:]] 
    train_loader = DataLoader(TensorDataset(X_train, Y_train), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=batch_size)
    return train_loader, test_loader


train_loader, test_loader = get_lean_training_datasets(players, teams)

In [73]:
normalize([0, 1, 2, 3], is_scalar=True).shape

torch.Size([4])

In [77]:
class AvgModel(nn.Module):
    def forward(self, x):
        '''
            Args 
                x - (N, D, L)
        '''
        return x[:,0,:].mean(dim=1).reshape((-1, ))


class PrevModel(nn.Module):
    def forward(self, x):
        '''
        '''
        return x[:,0,-1].reshape((-1, ))

class LinearModel(nn.Module):
    def __init__(self, window_size=4, num_features=5):
        super(LinearModel, self).__init__()
        self.dim = window_size * num_features
        self.fc1 = nn.Linear(self.dim, 1)
    
    def forward(self, x):
        x = x.reshape((x.shape[0], self.dim))
        return self.fc1(x).reshape((-1, ))

class RNNModel(nn.Module):
    def __init__(self, window_size=4, num_features=5, hidden_dim=128):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(num_features, hidden_dim, 4).double()
        self.fc = nn.Linear(hidden_dim, 1).double()

    def forward(self, x):
        '''
            Args
                x - shape (N, D, L)
        '''
        x = x.permute(2, 0, 1)
        h = self.rnn(x)
        o = self.fc(h[-1][-1])
        return o.reshape((-1, ))


input_tensor = torch.tensor([[0, 2, 4, 6], [1, 3, 5, 7]]).reshape((2, 1, 4)).double()
prev_model = PrevModel().double()
avg_model = AvgModel().double()
linear_model = LinearModel(num_features=1).double()
rnn_model = RNNModel(num_features=1)
print(avg_model.forward(input_tensor))
print(prev_model.forward(input_tensor))
print(linear_model.forward(input_tensor))
print(rnn_model.forward(input_tensor).shape)

tensor([3., 4.], dtype=torch.float64)
tensor([6., 7.], dtype=torch.float64)
tensor([-1.8409, -2.5804], dtype=torch.float64, grad_fn=<ViewBackward>)
torch.Size([2])


In [78]:
def fit(model, train_loader):
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    model.train()
    for epochs in range(30):
        for (inputs, outputs) in train_loader:
            optimizer.zero_grad()
            predictions = model.forward(inputs)
            residual = (predictions - outputs)
            loss = (residual * residual).sum()
            loss.backward()
            optimizer.step()

def eval(model, test_loader):
    sum_loss, count_loss = 0, 0
    for (inputs, outputs) in test_loader:
        predictions = (model.forward(inputs))
        assert(predictions.shape == outputs.shape)
        residual = (predictions - outputs)
        loss = (residual * residual).mean().item()
        sum_loss += loss 
        count_loss += 1 
    return sum_loss / count_loss

avg_model = AvgModel().double()
prev_model = PrevModel().double()
linear_model = LinearModel(num_features=6).double()
rnn_model = RNNModel(num_features=6)

fit(linear_model, train_loader)
fit(rnn_model, train_loader)

print(eval(avg_model, test_loader))
print(eval(prev_model, test_loader))
print(eval(linear_model, test_loader))
print(eval(rnn_model, test_loader))

0.8240670763787781
1.221983089450963
0.7154671937232996
0.7080206609708273
