In [2]:
import pandas as pd
import torch
import numpy as np
import random
from random import shuffle
np.random.seed(17)
random.seed(17)
torch.manual_seed(17)
import wget
import os
import aiohttp
import asyncio
from fpl import FPL
from torch.utils.data import TensorDataset, DataLoader
from player import Player
from team import Team
from data_processor import get_fpl, get_current_squad, get_teams, get_players, get_training_datasets
from agent import Agent
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping

# Why pytorch lightning 
- Better engineering for deep learning
    - Organises code, removes boiler plate, reduces errors, allows reproducibility. 

## Blocks of pytorch lightning
- Model
- Optimizer
- Data 
- Training loop
- Validation loop

## Notes
- Lightning automatically identifies and uses available GPUs. cuda() conversions are not needed. 
- Trainer object allows us to fit
    - Trainer.fit, default max epochs is 1000
    - To adopt lightning to GPU, pass gpus argument as 1 inside trainer. 
    - num_nodes argument provides you easy distributed training across 32 nodes. 
    - provdes 16 bit truncation. 
- Lightning provides a metrics module that allows you to measure accuracy etc easily. 
- Lightning takes care of doing backward poss, clearing optimizers. 
- Lightning_logs/ tracks hyper parameter, losses and models. 
    - Checkpointing is automatically setup. 


In [3]:
opponent_feature_names = ["npxG","npxGA"]
player_feature_names = ["total_points", "ict_index", "clean_sheets", "saves", "assists"]
window = 4
teams = get_teams(team_feature_names=opponent_feature_names, visualize=False, window=window)
players = await get_players(player_feature_names, opponent_feature_names, visualize=False, num_players=680, window=window)
train_loader, test_loader, _ = get_training_datasets(players, teams)
train_loader, test_loader

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_player_features["total_points"] = all_player_features["total_points"].clip(0, max_player_points)


(<torch.utils.data.dataloader.DataLoader at 0x7fefb7b7d0a0>,
 <torch.utils.data.dataloader.DataLoader at 0x7ff0b076c940>)

In [3]:
class LinearModel(nn.Module):
    def __init__(self, window_size=4, num_features=7):
        super().__init__()
        self.window_size = window_size
        self.num_features = num_features
        self.dim = window_size*num_features
        self.fc1 = nn.Linear(self.dim, 1)
    
    def forward(self, x):
        x = x.reshape((x.shape[0], self.dim))
        return self.fc1(x).reshape((-1, ))


class RNNModel(nn.Module):
    def __init__(self, window_size=4, num_features=7):
        super().__init__()
        self.num_features = num_features
        self.fc1 = nn.RNN(num_features, num_features, num_layers=5)
        self.fc2 = nn.Linear(num_features, 1)
    def forward(self, x):
        x = x.permute(dims=(0, 2, 1))
        i, _ = self.fc1(x)
        o = self.fc2(i[:,-1, :])
        return o


class LightningModel(pl.LightningModule):
    def __init__(self, window_size=4, num_features=7, use_opponent_features=True, len_opponent_features=2, model_type='linear'):
        super().__init__()
        self.window_size = window_size
        self.dim = window_size * num_features
        self.use_opponent_features = use_opponent_features
        self.len_opponent_features = len_opponent_features
        if model_type == 'linear':
            self.model = LinearModel(window_size, num_features)
        else:
            self.model = RNNModel(window_size, num_features)
        self.model_type = model_type

    def forward(self, x):
        return self.model.forward(x)
    
    def training_step(self, batch, batch_idx):
        x = batch[0]
        inputs = x[:,:,:self.window_size]
        outputs = x[:,0,self.window_size]
        predictions = self.model.forward(inputs)
        loss = nn.MSELoss()(predictions, outputs)
        self.log(f'{self.model_type} = train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        loss = self.training_step(batch, batch_idx)
        self.log(f'{self.model_type} = val_loss', loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

model = RNNModel()
t = torch.zeros((5,7,4))
model.forward(t).shape

torch.Size([5, 1])

In [4]:
for model_type in ['linear','rnn']:
    model = LightningModel(model_type=model_type, use_opponent_features=False, len_opponent_features=True)
    condition = f'{model_type} = val_loss'
    trainer = pl.Trainer(max_epochs=50)
    trainer.fit(model, train_loader, test_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type        | Params
--------------------------------------
0 | model | LinearModel | 29    
--------------------------------------
29        Trainable params
0         Non-trainable params
29        Total params
0.000     Total estimated model params size (MB)


                                                                      

  rank_zero_warn(


Epoch 49: 100%|██████████| 62/62 [00:00<00:00, 70.74it/s, loss=0.794, v_num=43]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | RNNModel | 568   
-----------------------------------
568       Trainable params
0         Non-trainable params
568       Total params
0.002     Total estimated model params size (MB)



                                                                      

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0:  79%|███████▉  | 49/62 [00:07<00:01,  6.98it/s, loss=0.997, v_num=44]

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1:   0%|          | 0/62 [00:00<00:00, 1057.83it/s, loss=0.997, v_num=44] 

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 49: 100%|██████████| 62/62 [00:07<00:00,  8.50it/s, loss=0.997, v_num=44]
