In [1]:
import pandas as pd # load csv dara
import numpy as np
import matplotlib.pyplot as plt
import torch as th
import torch.nn as nn


# data loading 
data = pd.read_csv('path.csv')


In [5]:
device = 'cuda' if th.cuda.is_available() else 'cpu'
device

'cuda'

In [9]:
def get_seq_data(data, num_stpes):
    # @param: data frame in a flat manner
    # @param: num_steps: num of past data to predict current data
    # @return: return a new numpy array which in shape original_size - num_steps x num_steps + 1 
    # (previous num_steps data to predict the current data)
    seq_data = np.zeros((data.shape[0]-num_stpes, num_stpes+1))
    for i in range(seq_data.shape[0]):
        seq_data[i] = data[i:i+num_stpes+1]
    return seq_data

sequence_data = get_seq_data(data, 7)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1,1))
# scalar = scaler.fit(sequence_data)
scaled_data = scaler.fit_transform(sequence_data) 

In [None]:
from torch.utils.data import Dataset, DataLoader, random_split
import tqdm

class SequenceDataset(Dataset):
    def __init__(self, data):
        super().__init__(data)
        self.past_data = data[:, :-1]
        self.target = data[:,-1]

    def __len__(self):
        return len(self.target)

    def __getitem__(self, index):
        # unsqueeze to make sure the feature size as 1 since we just predict one type value
        past_data_tensor = th.tensor(self.past_data[index]).float().unsqueeze(1)
        target_tensor = th.tensor(self.target[index]).float().unsqueeze(1)
        return past_data_tensor, target_tensor

my_dataset = SequenceDataset(scaled_data)
split_ratio = 0.9
num_train = int(split_ratio * my_dataset.__len__())
num_test = my_dataset.__len__() - num_train
train_dataset, test_dataset = random_split(my_dataset, [num_train, num_test])
batch_size = 64
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class LSTM(nn.Module):
    def __init__(self, input_size = 1, hidden_size=32, num_stacked_layer=3):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_stacked_layer = num_stacked_layer

        # input_size: feature size for single time step input
        # hidden_size: output size, number_stacked_layer: number of LSTM recursion module
        # if batch first is true, the input tensor should have the shape like [num_batches, time_steps, input_size]
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_stacked_layer,
                            batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.shape[0]
        # initialize the cell status and hidden status for starting
        # another initialization method is random value instead of zero values
        h0 = th.zeros(self.num_stacked_layer, batch_size, self.hidden_size).to(device)
        c0 = th.zeros(self.num_stacked_layer, batch_size, self.hidden_size).to(device)
        # LSTM takes the data, hidden state and cell state
        # LSTM returns two types of output, one is all the final hidden layer output at each time step
        # Another is the all the hidden state output at the last time step
        out, _ = self.lstm(x, (h0,c0))
        out = self.fc(out[:, -1, :])
        return out

model = LSTM(1,4,1)

In [None]:
lr = 0.001
num_epoches = 100
criterion = nn.MSELoss()
optimizer = th.optim.Adam(model.parameters, lr)

for epoch in num_epoches:
    model.train(True)
    for _, past_data, target in tqdm(enumerate(train_data_loader)):
        past_data, target = past_data.to(device), target.to(device)
        out = model(past_data)
        loss = criterion(out, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # validation
    model.eval()
    val_loss = 0
    for _, past_data, target in enumerate(test_data_loader):
        past_data, target = past_data.to(device), target.to(device)

        with th.no_grad():
            out = model(past_data)
            cur_loss = criterion(out, target)
            val_loss += cur_loss.item()
    avg_val_loss = val_loss / len(test_data_loader)


In [37]:
# test module
x = np.array([0,1,2,3,4,5])
y = np.zeros((3,5))
b = th.randn(2,3)
b.size()

torch.Size([2, 3])