In [25]:
import json
import pandas as pd
from numpy import array

import torch
import torch.nn
import torch.optim as optim

# Used in LTSMModel Class Instantiation
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [26]:
file_path = "C:\\Users\\dcrai\\source\\repos\\DATA698\\Code\\Data\\data.json"
file_path_hero = "C:\\Users\\dcrai\\source\\repos\\DATA698\\Code\\Data\\hero_id_table.csv"
#file_csv = "C:\\Users\\dcrai\\source\\repos\\DATA698\\Code\\Data\\iter_1.csv"
#data = json.loads(file)

with open(file_path, 'r') as file:
    data = json.load(file)


heroes= pd.read_csv(file_path_hero)
# Now 'data' contains the contents of the JSON file


In [27]:
# Extract 'match_id', 'hero_id', and 'gold_t' from each element in 'data'
match_ids = [element['match_id'] for element in data]
hero_ids = [element['hero_id'] for element in data]
gold_t_values = [element['gold_t'] for element in data]

# Create a DataFrame from the extracted values
df = pd.DataFrame({'match_id': match_ids, 'hero_id': hero_ids, 'gold_t': gold_t_values})

# Displ

#7517376613

In [28]:
df.iloc[2470]
df.iloc[2511]

match_id                                           7545503107
hero_id                                                    48
gold_t      [0, 204, 484, 748, 988, 1362, 1808, 2067, 2523...
Name: 2511, dtype: object

In [29]:
#df_match = df[df['match_id'] == 7517376613]

df_match = df[:300]

#df_match

df_subset = df_match[['hero_id', 'gold_t']].copy()

df_full = df[['hero_id', 'gold_t']].copy()

#df_t = df_subset.T

#df_t.columns
# explode = df_t.explode(26)

# explode
#df_subset
df_full

Unnamed: 0,hero_id,gold_t
0,26,"[0, 170, 260, 350, 440, 827, 1051, 1141, 1278,..."
1,84,"[0, 247, 580, 826, 1235, 1481, 1876, 2279, 265..."
2,25,"[0, 357, 715, 1130, 1578, 1968, 2261, 2861, 33..."
3,8,"[0, 246, 591, 895, 1255, 1499, 1823, 2117, 241..."
4,5,"[0, 170, 281, 395, 559, 699, 821, 937, 1148, 1..."
...,...,...
10165,10,"[0, 245, 553, 935, 1262, 1593, 2021, 2489, 295..."
10166,55,"[0, 209, 445, 744, 1168, 1593, 1953, 2531, 275..."
10167,98,"[0, 332, 654, 1046, 1536, 1828, 2285, 3071, 35..."
10168,45,"[0, 209, 299, 469, 589, 789, 971, 1186, 1392, ..."


### Missing Data Drop

In [30]:
missing_mask = df_full['gold_t'].isnull()
missing_indices = df_full.index[missing_mask]
print(missing_indices)
df_full.loc[missing_indices, ['hero_id','gold_t']]

df_full = df_full.dropna(subset=['gold_t'])
df_full = df_full.reset_index(drop=True) # Reset the indexes so no issues arise with using index locations to drop 0 length tensors
#df_full

Int64Index([2270, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2279, 2471,
            2472, 2473, 2474, 2475, 2476, 2477, 2478, 2479, 2521, 2522, 2523,
            2524, 2525, 2526, 2527, 2528, 2529, 2541, 2542, 2543, 2544, 2545,
            2546, 2547, 2548, 2549, 2621, 2622, 2623, 2624, 2625, 2626, 2627,
            2628, 2629, 5740, 5741, 5742, 5743, 5744, 5745, 5746, 5747, 5748,
            5749, 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, 5758, 5759,
            6910, 6911, 6912, 6913, 6914, 6915, 6916, 6917, 6918, 6919],
           dtype='int64')


### Zero Length Tensors Drop

If we don't drop the zero length tensors, when we call `model(hero_ids,time_series)` in the training loop, it will error out when attempting to run pad_packed_sequence


In [31]:
zero_length_indices = []

for index, time_series in enumerate(df_full['gold_t']):
    if len(time_series) == 0:
        zero_length_indices.append(index)

if len(zero_length_indices) > 0:
    print(f"Found {len(zero_length_indices)} tensors with length 0 at indices: {zero_length_indices}")
else:
    print("No tensors with length 0 found.")

Found 4 tensors with length 0 at indices: [2460, 2501, 2512, 2583]


In [32]:
df_full.loc[zero_length_indices, ['hero_id','gold_t']]

#df_full.iloc[2583][['hero_id','gold_t']]
#len(df_full.iloc[2460]['gold_t'])

Unnamed: 0,hero_id,gold_t
2460,112,[]
2501,39,[]
2512,22,[]
2583,129,[]


In [33]:
df_full = df_full.drop(zero_length_indices)

### Normalizing Data

In [34]:
# data = df_subset.gold_t

# from pandas import Series
# from sklearn.preprocessing import MinMaxScaler

# # define contrived series
# series = data.apply(pd.Series)
# print(series)

# # prepare data for normalization
# values = series.values


# scaler = MinMaxScaler(feature_range=(0, 1))
# scaler = scaler.fit(values)
# print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))

# #values = values.reshape((len(values), 1))


### TimeSeriesDataset Building

Summary of the classes we are about to build.

General Things to Understand:

- the forward method is basically a return call
    - forward method is a part of all nn.Module classes (nn.Module is the base class for any neural net node)

- 

1. TimeSeriesDataset

The TimeSeriesDataset is a class we are making while inheriting the PyTorch Dataset class, which is an abstract free-form class with little restriction to it that mainly provides certain methods like __get_item__ that can be passed on to subsequent layers in the architecture. 

Class Responsibiltiies:
- Establish hero_ids and time_series as separate callable variables
- Establish the length of the Longest tensor to help with padding
- Establish retrieving a hero_id and its tensor with __get_item__

When get_item is called by DataLoader it will be passed an index, where it will retrieve a hero_id and its time series. Then it pads the time series based on the length of the longest tensor and returns the hero_id, padded time series, and the time series' length before padding

2. Process Embedding

The ProcessEmbedding is a class also provided by PyTorch that will act as an embedding handler. The embedding matrix is a matrix of vectors containing numerical values that can be trained over to help capture the unique effects of each hero. Effectively, this is the method used to account for heros that are more eFfective with gold than others.

Class Responsibilities:
- Establish number of hero_ids/vectors needed in the matrix
- Establish number of dimensions for the embedding matrix to have
- Instantiate the nn.Embeddings class with the prior dimensions

In [35]:

#pad_test = [torch.tensor(ts) for ts in df_full['gold_t']]

#pad_test

In [36]:
#tensor_pad_test = pad_test[1]

In [37]:
# pad_max_length = max(len(ts) for ts in pad_test)
# print(pad_max_length)
# padded_time_series = torch.zeros(pad_max_length)
# ts_length = len(tensor_pad_test)
# padded_time_series[:ts_length] = tensor_pad_test

#padded_time_series

#pack_padded_sequence(input, lengths, batch_first=False, enforce_sorted=True)


In [38]:
import torch
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    # Class to create our dataset
    def __init__(self, df):
        self.hero_ids = df['hero_id'].values # Declaring hero_id values
        self.time_series = [torch.tensor(ts) for ts in df['gold_t']] # Converting the time_series into Tensors
        self.max_length = max(len(ts) for ts in self.time_series) # Grabs max length of all the tensors to pad them with 0s later

        """
        # Check the dimensionality of the time series tensors
        for ts in self.time_series:
            print(f"Time series shape: {ts.shape}, Dimensions: {ts.dim()}")
        """


    def __len__(self):
        return len(self.hero_ids) # Convenient length call
    

    def __getitem__(self, idx):
        # For the DataLoader, indexes are called to pull time-series data. Since we have the categorical variable in hero_id, we need to ensure
        #     to pull both the hero_id and the associated index of the time series. To do that the DataLoader passes the __getitem__ method an
        #     index, and __getitem__ returns the hero_id and the time_series at that index. In a time-series without categorical data, it'd normally
        #     just be the time series that is returned.
        hero_id = self.hero_ids[idx]
        time_series = self.time_series[idx]
        length = len(time_series)

        padded_time_series = torch.zeros(self.max_length)
        padded_time_series[:length] = time_series
        #print(f"Padded Time Series: {padded_time_series}")

        return hero_id, padded_time_series, length

Playing with how the __getitem__ method works below

In [39]:
# ts_test = [torch.tensor(ts) for ts in df_subset['gold_t']] # Converting the time_series into Tensors
# length = len(ts_test[0])
# max_length = max(len(ts) for ts in ts_test)
# print(ts_test[0].shape)
# #print(ts_test[0].size(1))

# padded_test = torch.zeros(max_length, ts_test[0].size(0)) 

# padded_test[:length] = ts_test[0]

# padded_test
# #padded_test.size(1)
# # ts_test[1].size(1)
# # max_length = max(len(ts) for ts in ts_test)

# # print(max_length)
# # print(ts_test[1].size(1))

# # len(ts_test)

### Embedding Processing

Embedding will allow the model to develop a representation for each hero_id, with like hero_ids having similar embedding vectors. These vectors will help represent the differences between carries, mids, offlaners, and more. The embedding matrix is treated as a learning parameter, which means backpropagation will update the values in the matrix based on the loss function in training.

Rule of Thumb: A common rule of thumb is to set the embedding_dim to be at least as large as the logarithm of the number of classes or groups you want to represent or expect. In your case, with around 5 groups, the logarithm of 5 is approximately 2. Since I know there are 5 roles, and I'd like to see them expressed atleast by role the log would be an embed vector of size 2.

Rule of Thumb (NLP): A common rule of thumb in natural language processing (NLP) tasks, where embeddings are widely used, is to set the embedding dimension to be approximately the fourth root of the vocabulary size (number of unique tokens/entities). While this is not a hard rule, it provides a reasonable ballpark estimate. In your case, with 124 unique processes, the fourth root of 124 is approximately 5. Multiplying this by a factor (e.g., 16 or 32, which are common embedding sizes) gives you a range of 80-160 as a reasonable starting point for the embedding dimension.

In many deep learning applications, embedding dimensions of 64, 128, or 256 are used.

I will try embed sizes of 20, 32, 64, 128, and 256.

In [40]:
print(list(enumerate(df_subset['hero_id'].unique())))

#(0, 26), (1, 84), (2, 25), (3, 8

matrix_test = {hero_id: idx for idx, hero_id in enumerate(df_subset['hero_id'].unique())}
print(matrix_test)
               

[(0, 26), (1, 84), (2, 25), (3, 8), (4, 5), (5, 68), (6, 86), (7, 23), (8, 77), (9, 54), (10, 70), (11, 80), (12, 100), (13, 63), (14, 2), (15, 17), (16, 110), (17, 45), (18, 10), (19, 104), (20, 19), (21, 64), (22, 53), (23, 102), (24, 71), (25, 7), (26, 67), (27, 11), (28, 30), (29, 33), (30, 1), (31, 57), (32, 21), (33, 44), (34, 69), (35, 76), (36, 83), (37, 28), (38, 87), (39, 129), (40, 12), (41, 43), (42, 120), (43, 14), (44, 46), (45, 97), (46, 42), (47, 50), (48, 22), (49, 29), (50, 51), (51, 48), (52, 61), (53, 6), (54, 114), (55, 39), (56, 101), (57, 85), (58, 136), (59, 20), (60, 112), (61, 96), (62, 121), (63, 79), (64, 88), (65, 65), (66, 47), (67, 113), (68, 66), (69, 109), (70, 93), (71, 94), (72, 59), (73, 106), (74, 98), (75, 58), (76, 92), (77, 4), (78, 31), (79, 123), (80, 78), (81, 41), (82, 13), (83, 52), (84, 18), (85, 128), (86, 89), (87, 72), (88, 135), (89, 75), (90, 81), (91, 9), (92, 40), (93, 90), (94, 138), (95, 91), (96, 126), (97, 73), (98, 60)]
{26: 0, 

In [41]:


matrix_test = {hero_id: idx for idx, hero_id in enumerate(df_subset['hero_id'].unique())}

In [42]:
import torch
import torch.nn as nn

class ProcessEmbedding(nn.Module):
    def __init__(self, df, embedding_dim):
        super(ProcessEmbedding, self).__init__() 
        """
        super() calls the intialization of the parent class of ProcessEmbedding, in this case nn.Module (PyTorch's class for all nnets)
        This is done to initizialize the class correctly. If not called, the nn.Module's functionalities will not work.

        """
        self.num_processes = len(df['hero_id'].unique()) # declaring number of different categories of time-series for dimensionialty reasons
        self.embedding_dim = embedding_dim # passing our embed size to be a class attribute
        self.process_embeddings = nn.Embedding(self.num_processes, embedding_dim)

        """ nn.Embedding creates and stores the embedding vector , it takes two arguments;
                    num_processes - the number of different embeddings it will need to hold (for us it would be the number of hero_ids = 124)
                    embedding_dim - the size of the vector of the embedding
        """


        self.hero_id_to_idx = {hero_id: idx for idx, hero_id in enumerate(df['hero_id'].unique())}

        """  self.hero_id_to_idx: a mapping of hero_ids to embedding matrix vector IDs
        - converts hero_id to an integer index and creates a matrix mapping of the values to pass on
        - is created since nn.Embedding expects an integer for an index value where an embedding is stored
        - this attribute effectively converts hero_ids to an index value that corresponds to the Embedding vectors row of values for that hero_id

        enumerate(df['hero_id'].unique()): iterates over the unique hero_id values and assigns a sequential index (idx) to each value, starting from 0. 
        {} creates a dictionary of key-pairs, the basic form of creating a dictionary: {key_expression: value_expression for item in iterable},
            which allows us to define the key-pairs over an iterable in one line of code


        It returns a list of tuples (idx, hero_id).
        For example, if the unique hero_id values are ['26', '184', '225', '38'], the enumerate function will return:
        [(0,26), (1,184), (2,225), (3,38)

        This is then passed to the dictionary key loop "{hero_id: idx for idx, hero_id in }". The curly brackets denote the creation of a dictionary.
        This will then create the following dictionary:
        {26: 0, 184: 1, 225: 2, 38: 3} - thus creating our mapping to pull the correct embedding vector per the hero id
        
        """
        

    def forward(self, hero_ids):
        """
        Called when: 
            LSTM Module calls process_embedding
        Args:
            hero_ids (Tensor or List): A tensor or list of hero IDs.
        Returns:
            Tensor: A tensor of shape (batch_size, embedding_dim) containing the process embeddings.

        We use the embedding with the forward method below.
            1. We convert the input hero_ids to their corresponding indices (process_ids) in the Embed Vector using the self.hero_id_to_idx mapping.
            2. We create a PyTorch tensor process_ids from the indices.
            3. We pass this process_ids tensor to the self.process_embeddings module, which retrieves and returns the corresponding embedding vectors.
        """

        process_ids = [self.hero_id_to_idx[hero_id.item()] for hero_id in hero_ids]
        process_ids = torch.tensor(process_ids)
        process_embeddings = self.process_embeddings(process_ids)

        print("Process Embeddings shape:", process_embeddings.shape)
        print("Process Embeddings tensor:", process_embeddings)

        return process_embeddings

        # This version only returns the unique hero_ids, I think this was done because I believed the 
        # embedding vector was the item being updated and I wanted to ensure no duplicates were created in teh embedding vector
        # This was a misunderstanding of mine on when the forward method was call as the LSTM calls it when retrieving the embedding vectors
    
    
        # process_ids = []
        # print(f"Hero IDs: {hero_ids}")
        # for hero_id in hero_ids:
        #     process_id = self.hero_id_to_idx[hero_id.item()] # pull the associated index number to the hero_id that will later be used as the index identifier in the embedding matrix
        #     if process_id not in process_ids: # this will ensure that only unique values are added to the list
        #         process_ids.append(process_id)
        
        # # remove or no? - process_ids = [self.hero_id_to_idx[hero_id] for hero_id in hero_ids] # convert hero_ids - remove or no?
        # process_ids = torch.tensor(process_ids) # Create tensor of process_ids

        # print(f"Proc_Embed Shape: {self.process_embeddings.shape}")

        # return self.process_embeddings(process_ids) # Pass the process_ids to self.process_embeddings
    

    # def forward(self, hero_ids):
    #     process_ids = []
    #     for hero_id in hero_ids:
    #         try:
    #             process_id = self.hero_id_to_idx[hero_id.item()]
    #             process_ids.append(process_id)
    #         except KeyError:
    #             print(f"Error: hero_id {hero_id.item()} not found in hero_id_to_idx dictionary.")
    #             raise

    #     process_ids = torch.tensor(process_ids)
    #     return self.process_embeddings(process_ids)

#### Data Loader

## Building LSTM Model

##### LSTM Input Dimension Expectations:

The LSTM model expects the input data to have three dimensions in a specific order: (batch_size, seq_length, input_size).

Batch Size (batch_size):
Represents the number of samples or sequences being processed in a single batch.
It allows the model to process multiple sequences simultaneously, which can improve training efficiency and speed.
Sequence Length (seq_length):
Represents the length of each input sequence or the number of time steps in the sequence.
In your case, it corresponds to the number of time steps in the "gold_t" values for each hero_id.
The LSTM model will process the input sequence step by step along this dimension.
Input Size (input_size):
Represents the size of the input features at each time step.
It is determined by the size of the concatenated vector of process embeddings and time series data.
In your case, input_size is calculated as process_embedding.embedding_dim + 1, where embedding_dim is the size of the process embedding vector, and 1 corresponds to the single value of the time series data at each time step.
So, if we have the following dimensions:

Batch Size: 32
Sequence Length: 10
Process Embedding Dimension: 64
Time Series Dimension: 1
The expected input shape for the LSTM model would be: (32, 10, 65)

The first dimension (32) represents the batch size.
The second dimension (10) represents the sequence length.
The third dimension (65) represents the input size, which is the sum of the process embedding dimension (64) and the time series dimension (1).
To ensure that the input data matches the expected dimensions of the LSTM model, you need to properly shape and concatenate the process embeddings and time series data, as shown in the previous examples.

The LSTM model will take this input tensor of shape (batch_size, seq_length, input_size) and process it sequentially, updating its hidden states at each time step based on the current input and the previous hidden state. The LSTM layers will learn to capture the dependencies and patterns in the input data and produce the desired output.

In [43]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, process_embedding):
        super(LSTMModel, self).__init__() # ensures the correcty PyTorch class is also initialized

        self.hidden_size = hidden_size #hyper param 
        self.num_layers = num_layers #hyper param

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # Actual LSTM creation
        self.fc = nn.Linear(hidden_size, output_size) # Linear Model creation
        self.process_embedding = process_embedding # Process Embedding


    def forward(self, hero_ids, time_series):
        batch_size = time_series.size(0) # pulling dims from the tensor
        seq_length = time_series.size(1) # pulling dims from the tensor
        
        # Get process embeddings for hero_ids
        process_embeddings = self.process_embedding(hero_ids)

        print("Process Embeddings shape:", process_embeddings.shape)
        print("Time Series shape:", time_series.shape)
        
        # Reshape process embeddings to match the input shape of LSTM
        process_embeddings = process_embeddings.unsqueeze(1).repeat(1, seq_length, 1)

        print("Reshaped Process Embeddings shape:", process_embeddings.shape)

        # Unsqueexing to ensure the time_series shape is 3D like our embedding processing is so that no issues are ran into with torch.cat below
        time_series = time_series.unsqueeze(-1)

        print("Time Series shape with extra dimension:", time_series.shape)
        
        # Concatenate process embeddings with time series data
        # dim = -1, signifies concatenation across the last dimension (the feature dimension)
        input_data = torch.cat((process_embeddings, time_series), dim=-1)
        
        # Pack the padded sequences
        # Packing the padded Sequences is a way of optimizing computation times. We have padded the time series to all be the same length, even though some are only 20 or less
        # The packing indicates which are the real values in the time series so that the computation is only ran on those time steps. Details on how are unknown to me thus far.
        packed_input = pack_padded_sequence(input_data, lengths, batch_first=True, enforce_sorted=False)


        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        

        packed_output, _ = self.lstm(packed_input, (h0, c0))

        # Unpack the output
        output, _ = pad_packed_sequence(packed_output, batch_first=True)

        # Take the last output of the LSTM
        out = self.fc(output[:, -1, :])
        
        return out

### Instantiating Classes and Parameters

In [44]:


process_embedding = ProcessEmbedding(df_full, embedding_dim=84) # we create the embedding vector on unsplit data to ensure all unique hero id's are contained

#input_size = process_embedding.embedding_dim + time_series.shape[-1]  # Number of features (embedding_dim + time_series_dim)
input_size = process_embedding.embedding_dim + 1 #84 + 1
hidden_size = 64
num_layers = 2
output_size = 1  # Assuming you want to predict a single value


model = LSTMModel(input_size, hidden_size, num_layers, output_size, process_embedding)

#### Training the Model

### Training Parameters

In [45]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
batch_size = 32

### Train Test Split

In [46]:
test_size = .30

train_df, test_df = train_test_split(df_full, test_size=test_size, shuffle=False)

#print(type(train_df))
#train_df

#### Dataset and Data Load

In [47]:
#print(type(df_no_match))
#print(type(df_subset))

#test_series = [torch.tensor(df_subset) for ts in df_subset['gold_t']]
#time_series = [torch.tensor(train_df) for ts in train_df['gold_t']]

In [48]:
train_dataset = TimeSeriesDataset(train_df)
test_dataset = TimeSeriesDataset(test_df)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [49]:
# Training loop
num_epochs = 1000
batch_size = 32

for epoch in range(num_epochs):
    model.train()
    
    train_loss = 0.0
    print(f"Epoch: {epoch}")
    for hero_ids, time_series, lengths in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(hero_ids, time_series)
        targets = time_series[:, -1]  # Assuming you want to predict the last value of each time series
        loss = criterion(outputs.squeeze(), targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * time_series.size(0)

    train_loss /= len(train_dataset)

    model.eval()
    test_loss = 0.0

    with torch.no_grad():
        for hero_ids, time_series, lengths in test_loader:
            # Forward pass
            
            outputs = model(hero_ids, time_series)
            targets = time_series[:, -1]  # Assuming you want to predict the last value of each time series
            loss = criterion(outputs.squeeze(), targets)

            test_loss += loss.item() * time_series.size(0)

    test_loss /= len(test_dataset)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")

Epoch: 0
Padded Time Series: tensor([  519.,   697.,   916.,  1163.,  1298.,  1456.,  1587.,  2159.,  2415.,
         2559.,  2777.,  3402.,  3721.,  4047.,  4274.,  4478.,  4616.,  4828.,
         5049.,  5409.,  5691.,  5858.,  6024.,  6518.,  6962.,  7430.,  7526.,
         7622.,  7844.,  7989.,  8089.,  8189.,  8588.,  8732.,  8832.,  8932.,
         9211.,  9311.,  9489., 10091.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.])
Padded Time Series: tensor([    0.,   216.,   342.,   432.,   585.,   675.,   914.,  1076.,  1544.,
      

KeyboardInterrupt: 

In [90]:
train_loader.dataset.time_series[0]



tensor([   0,  170,  260,  350,  440,  827, 1051, 1141, 1278, 1368, 1593, 2076,
        2166, 2261, 2609, 2704, 2834, 3702, 4056, 4214, 4309, 4640, 4806, 5091,
        5451, 5635, 5790, 5943])

In [89]:
process_embeddings[0]
time_series.shape

#torch.cat((process_embeddings, time_series), dim=-1)

tensor([ 0.7608, -0.6901,  0.2837,  0.6550, -0.5765,  0.6499, -1.0929, -0.1403,
         0.6046, -0.4389, -0.1541,  1.4986,  0.4376,  0.6602, -1.3858, -1.5050,
         0.1409, -0.7497, -2.0448, -1.2075,  0.5640, -1.0679, -2.4077,  1.2261,
        -0.3711,  0.1370, -1.0203,  0.2047, -0.2902,  0.0914, -1.5376,  1.0508,
         1.6433,  1.1089,  0.2639,  2.2881, -0.9979, -0.2200,  0.3248,  0.9091,
         0.9663, -0.2991, -0.4270,  1.3129,  0.1452,  0.0179, -0.3316, -0.0757,
         0.7799, -1.6530, -0.5533,  0.8450, -1.5635,  0.0183, -0.0912, -1.5416,
         0.0700,  0.6856, -0.7086, -0.2184, -0.4975,  0.3429, -0.3686, -0.4777,
         0.4892, -1.1268, -1.0243,  0.3240, -0.4425,  0.6808, -1.5136, -0.4021,
         0.5163, -0.5597, -0.8106,  0.8761,  0.7848,  0.6040,  0.0818, -0.6002,
         0.7887, -1.0970,  1.9276, -1.0555], grad_fn=<SelectBackward0>)