In [None]:
import json
import pandas as pd
from numpy import array

import torch
import torch.nn as nn
import torch.optim as optim

# Used in LTSMModel Class Instantiation
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
import torch
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    # Class to create our dataset
    def __init__(self, df, lookback):
        self.hero_ids = df['hero_id'].values # Declaring hero_id values
        self.time_series = df[['gold_t']]
        #[torch.tensor(ts) for ts in df['gold_t']] # Converting the time_series into Tensors
        self.max_length = max(len(ts) for ts in self.time_series) # Grabs max length of all the tensors to pad them with 0s later
        self.match_ids = df['match_id'] #Storing the match_id in case we want to view this later for more info
        self.lookback = lookback


    def __len__(self):
        return len(self.hero_ids) # Convenient length call


    def create_windows(self, timeseries, horizon):
        X, y = [], []
        for i in range(len(timeseries) - self.lookback - horizon + 1):
            feature = timeseries[i:i+self.lookback]
            target = timeseries[i:i+self.lookback+horizon] # we want the target to contain the timesteps from the lookback AND the steps forward so that the lookback steps will be treated as features
            X.append(feature)
            y.append(target)


        #print("Create Window X Obj:",X)
        #print("Create Window y Obj:",y)
        #print("Create Window Type X:", type(X))
        #print("Create Window Type y:", type(y))
        #print(len(X))
        #print(len(y))

        X = torch.tensor(X)
        y = torch.tensor(y)

        return X, y



    def __getitem__(self, idx):
        #print("1st Step __getitem__ State: ", idx)
        hero_id = self.hero_ids[idx]

        time_series = np.array(self.time_series.iloc[idx][0]).astype('float32') # Since the df_allhero 'gold_t' column is a column of lists, we take the
        #first row of the df_allhero with .iloc[idx]
        # then we access the the first element of the row, which is the list, with [0]
        # we convert it to a numpy array, and then convert values to float32
        # we do this to be compatible with the ConstantMinMaxScaler()

        match_id = self.match_ids[idx]



        scaled_time_series = ConstantMinMaxScaler(time_series, min_gold, max_gold)
        #print("Type of scaled_time_series:",type(scaled_time_series))
        length = len(scaled_time_series)

        X, y = self.create_windows(scaled_time_series, horizon = 5)
        #print(X.shape, y.shape)
        # print("Post create_window:",idx)
        # print("Type of X", type(X))
        # print("Type of y", type(y))



        return hero_id, X, y



In [None]:
class ProcessEmbedding(nn.Module):
    def __init__(self, df, embedding_dim, lookback):
        super(ProcessEmbedding, self).__init__()

        self.num_processes = len(df['hero_id'].unique()) # declaring number of different categories of time-series for dimensionialty reasons
        self.embedding_dim = embedding_dim # passing our embed size to be a class attribute
        self.process_embeddings = nn.Embedding(self.num_processes, embedding_dim)

        self.hero_id_to_idx = {hero_id: idx for idx, hero_id in enumerate(df['hero_id'].unique())}



    def forward(self, hero_id):
        process_ids = self.hero_id_to_idx[hero_id]
        process_ids = torch.tensor([process_ids])
        process_embeddings = self.process_embeddings(process_ids) #.unsqueeze(1).repeat(1, self.lookback, 1)

        # print("Process Embeddings shape:", process_embeddings.shape)
        # print("Process Embeddings tensor:", process_embeddings)

        return process_embeddings


In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, process_embedding):
        super(LSTMModel, self).__init__() # ensures the correcty PyTorch class is also initialized

        self.hidden_size = hidden_size #hyper param
        self.num_layers = num_layers #hyper param

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # Actual LSTM creation
        self.fc = nn.Linear(hidden_size, output_size) # Linear Model creation
        self.process_embedding = process_embedding # Process Embedding


    def forward(self, batch):
        #print("LSTM Forward Method Batch Type: ",type(batch))
        # Since our Dataset class returns 3 objects, hero_id, X, y and the forward method only expects two, we have to
        #    tell our forward method to expect one object and unpack it
        hero_ids = batch[0][0] # the _ is a placeholder that doesn't save the last object in the batch which is the y tensor
        X = batch[1][0]
        X = X.unsqueeze(-1) # To match LSTM model's desired shape of (batch_size, seq_length, input_size)

        # print("LSTM Forward Method X Type: ",type(X))
        # print(" LSTM Forward Method - X Shape:", X.shape)
        # print("LSTM Forward Method Tensor X",X)
        # print("LSTM Forward Method hero_ids Type: ",type(hero_ids))
        # print("LSTM Forward Method hero_ids:", hero_ids)

        batch_size = X.size(0) # pulling dims from the tensor
        seq_length = X.size(1) # pulling dims from the tensor

        # print("LSTM Forward Method - batch_size", batch_size)

        # Get process embeddings for hero_ids
        process_embeddings = self.process_embedding(hero_ids)

        # print("LSTM Forward Method - Process Embeddings Shape Pre-Repeat:", process_embeddings.shape)
        # print("LSTM Forward Method - Process Embeddings:", process_embeddings)

        # Reshape process embeddings to match the input shape of LSTM
        # process_embeddings = process_embeddings.unsqueeze(1).repeat(1, seq_length, 1)
        process_embeddings = process_embeddings.repeat(batch_size, seq_length, 1) # changing process embedding shape to broadcast across the same number of samples in the X tensor
        # we do this to match the dimensions so that torch.cat will work
        # print("LSTM Forward Method - Process Embeddings Shape Post-Repeat:", process_embeddings.shape)
        # dim = -1, signifies concatenation across the last dimension (the feature dimension)
        combined_input = torch.cat((X,process_embeddings),dim=-1) #

        # print("Concat'd Time-Series + Embedding shape:", combined_input.shape)

        # Unsqueexing to ensure the time_series shape is 3D like our embedding processing is so that no issues are ran into with torch.cat below
        #time_series = time_series.unsqueeze(-1)

        #print("Time Series shape with extra dimension:", time_series.shape)

        # Concatenate process embeddings with time series data



        # Pack the padded sequences
        # Packing the padded Sequences is a way of optimizing computation times. We have padded the time series to all be the same length, even though some are only 20 or less
        # The packing indicates which are the real values in the time series so that the computation is only ran on those time steps. Details on how are unknown to me thus far.
        #packed_input = pack_padded_sequence(input_data, lengths, batch_first=True, enforce_sorted=False)


        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)


        #packed_output, _ = self.lstm(packed_input, (h0, c0))

        # Unpack the output
        #output, _ = pad_packed_sequence(packed_output, batch_first=True)

        # Pass combined input to LSTM layer
        output, _ = self.lstm(combined_input)

        # Take the last 5 outputs of the LSTM
        out = self.fc(output[:, -5, :])

        return out