In [1]:
import pandas as pd
import torch
import numpy as np
import random
from random import shuffle
np.random.seed(17)
random.seed(17)
torch.manual_seed(17)
import wget
import os
import aiohttp
import asyncio
from fpl import FPL
from torch.utils.data import TensorDataset, DataLoader
from player import Player
from team import Team
from data_processor import get_fpl, get_players, get_teams, get_training_datasets, get_all_player_features
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
fpl = await get_fpl()
team_feature_names = ["npxGA"]
teams = get_teams(team_feature_names=team_feature_names, visualize=False)
player_feature_names = ["total_points"]
players = await get_players(player_feature_names, team_feature_names, visualize=False, num_players=590)

In [17]:
def get_timeseries_dataset(players, context_window=10):
    '''
        Args
            players - List of players 
        Returns
            Time series dataset containing player points

            context_window = feature_window + 1 + choice    
            X - shape (N, context_window)
    '''
    timeseries_dataset = []
    for player in players:
        player_points = player.player_features[0,:]
        for i in range(len(player_points) - context_window):
            timeseries_dataset.append(player_points[i:i+context_window])
    random.shuffle(timeseries_dataset)
    timeseries_dataset = np.array(timeseries_dataset).astype(float)
    return timeseries_dataset


timeseries_dataset = get_timeseries_dataset(players[0:5])[0:50]
timeseries_dataset

array([[ 0.,  0.,  0.,  6.,  5.,  2.,  1.,  2.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 2.,  2.,  0.,  2.,  2.,  9.,  2.,  6.,  2.,  4.],
       [ 2.,  1.,  1.,  0.,  1.,  1.,  2.,  2.,  1.,  0.],
       [ 2.,  2.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,  2.,  2.],
       [ 0., 12.,  2.,  0.,  0.,  0.,  0.,  0.,  5.,  0.],
       [ 9., 12.,  5.,  9.,  2.,  2.,  2.,  8.,  2.,  5.],
       [ 2.,  9.,  2.,  6.,  2.,  4.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 2.,  6.,  1.,  7.,  2.,  0.,  0.,  0.,  0.,  2.],
       [ 2.,  6.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.],
       [ 2.,  4.,  0.,  0.,  2.,  8., 12.,  2.,  0.,  2.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  2.,  2.,  2.,  6.,  5.,  2.,  1.],
       [ 0.,  2.,  6.,  1.,  0.,  0.,  0.,  0.,  0.,  1.

# Dynamic data augmentation
- Input window is x [0:7]. 2 elements are set to 0 at random
- Target is sampled form x[7:9]
- Why - I want an nearly infinite dataset

In [38]:
train_indices = int(0.8 * len(timeseries_dataset))
train_dataset, test_dataset = timeseries_dataset[:train_indices], timeseries_dataset[train_indices:]
train_loader = DataLoader(TensorDataset(torch.tensor(train_dataset)), batch_size=1)
test_loader = DataLoader(TensorDataset(torch.tensor(test_dataset)), batch_size=1)
input_window_length = 7
for [x] in train_loader:
    print(x)
    input_vector = x[:,:input_window_length]
    masks = np.random.choice([0, 1, 2, 3, 4, 5, 6], 2)
    print(masks)
    input_vector[:,masks] = 0
    print(input_vector)
    prediction_choice = random.choice([7, 8, 9])
    prediction_target = x[:,prediction_choice]    
    print(prediction_choice)
    print(prediction_target)
    break

tensor([[0., 0., 0., 6., 5., 2., 1., 2., 1., 1.]], dtype=torch.float64)
[0 5]
tensor([[0., 0., 0., 6., 5., 0., 1.]], dtype=torch.float64)
7
tensor([2.], dtype=torch.float64)
