In [170]:
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(threshold=np.inf)

"""
The .npz file format is a way to store a bunch of numpy arrays. I ran 100 simulations and 
stored the data in the file 'data.npz'.
"""

all_data = np.load('data.npz')
file_names = all_data.files
print(file_names)

"""
To retrieve the first simulation data, we would use the following:
"""

first_file_name = file_names[0]
first_simulation_data = all_data[first_file_name]
#print(first_simulation_data.shape)
#print(first_simulation_data)
##################################################################################################################

# Importing all samples
data = [first_simulation_data]

for i in range (1, 99):
    file_name = file_names[i]
    simulation_data = all_data[file_name]
    data.append(simulation_data)

data = np.array(data, dtype=object)
data[0].shape[0]

['arr_0', 'arr_1', 'arr_2', 'arr_3', 'arr_4', 'arr_5', 'arr_6', 'arr_7', 'arr_8', 'arr_9', 'arr_10', 'arr_11', 'arr_12', 'arr_13', 'arr_14', 'arr_15', 'arr_16', 'arr_17', 'arr_18', 'arr_19', 'arr_20', 'arr_21', 'arr_22', 'arr_23', 'arr_24', 'arr_25', 'arr_26', 'arr_27', 'arr_28', 'arr_29', 'arr_30', 'arr_31', 'arr_32', 'arr_33', 'arr_34', 'arr_35', 'arr_36', 'arr_37', 'arr_38', 'arr_39', 'arr_40', 'arr_41', 'arr_42', 'arr_43', 'arr_44', 'arr_45', 'arr_46', 'arr_47', 'arr_48', 'arr_49', 'arr_50', 'arr_51', 'arr_52', 'arr_53', 'arr_54', 'arr_55', 'arr_56', 'arr_57', 'arr_58', 'arr_59', 'arr_60', 'arr_61', 'arr_62', 'arr_63', 'arr_64', 'arr_65', 'arr_66', 'arr_67', 'arr_68', 'arr_69', 'arr_70', 'arr_71', 'arr_72', 'arr_73', 'arr_74', 'arr_75', 'arr_76', 'arr_77', 'arr_78', 'arr_79', 'arr_80', 'arr_81', 'arr_82', 'arr_83', 'arr_84', 'arr_85', 'arr_86', 'arr_87', 'arr_88', 'arr_89', 'arr_90', 'arr_91', 'arr_92', 'arr_93', 'arr_94', 'arr_95', 'arr_96', 'arr_97', 'arr_98', 'arr_99']


630

In [171]:
data.shape
# inputs has size (batch_size, seq_len, input_size)

# data = torch.Tensor(np.asarray(first_simulation_data))
    

(99,)

In [172]:
import torch
from torch import nn

torch.manual_seed(1234)

class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # lstm, linear are layers in the network
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, output_size)
        
    def forward(self, data):
        batch_size = data.shape[0]
        
        h_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        c_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        
        out, (hidden, cell) = self.lstm(data, (h_0, c_0))
        
        out = self.linear(out)
        
        return out, (hidden, cell)

In [173]:
# define hyperparameters and instantiate model

input_size = 4
output_size = 4
hidden_dim = 32
n_layers = 1

lstm = LSTM(input_size=input_size, output_size=output_size, hidden_dim=hidden_dim, n_layers=n_layers)

n_epochs = 100
lr = 0.01

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(lstm.parameters(), lr=lr)

In [174]:
# Training
def training(n_epochs, model, optimizer, criterion, train_input, train_target):
    for epoch in range(1, n_epochs + 1):
        optimizer.zero_grad()
        output, (hidden, cell) = model(train_input)
        loss = criterion(output, train_target)
        loss.backward()
        optimizer.step()
    
        if epoch%10 == 0:
            print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
            print("Loss: {:.4f}".format(loss.item()))
    return model
    

In [175]:
# Wrangling data
""""
print(data[0].shape)
train_prop = 0.9
train_samples = round(data.shape[0] * train_prop)
test_samples = data.shape[0] - train_samples

train_input = data[test_samples:]
print(train_input.shape)
train_target = data

"""

# for now we will just use the first simulation
train_set = data[0][90:]
test_set = data[0][:90]

# Use the first 20 4-number sequences to predict the next 20 4-number sequences
def create_target(inputs):
    x_train = [inputs[i:i+20] for i in range(len(inputs) - 40)]
    y_train = [inputs[i+20:i+40] for i in range(len(inputs) - 40)]
    
    x_train = torch.stack(x_train)
    y_train = torch.stack(y_train)
    
    return x_train, y_train

train_set = torch.Tensor(train_set)
train_input, train_target = create_target(train_set)

print(train_input.shape)
print(train_target.shape)

torch.Size([500, 20, 4])
torch.Size([500, 20, 4])


In [176]:
training(n_epochs, lstm, optimizer, criterion, train_input, train_target)

Epoch: 10/100............. Loss: 29083502592.0000
Epoch: 20/100............. Loss: 12572798976.0000
Epoch: 30/100............. Loss: 5525599232.0000
Epoch: 40/100............. Loss: 2516692736.0000
Epoch: 50/100............. Loss: 1231041664.0000
Epoch: 60/100............. Loss: 680767616.0000
Epoch: 70/100............. Loss: 444327520.0000
Epoch: 80/100............. Loss: 341838464.0000
Epoch: 90/100............. Loss: 296539872.0000
Epoch: 100/100............. Loss: 275675968.0000


LSTM(
  (lstm): LSTM(4, 32, batch_first=True)
  (linear): Linear(in_features=32, out_features=4, bias=True)
)