# Project 2

## Load libs

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Model definitions

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[-1] # Only keep the last output in the sequence
        out = self.fc(lstm_out)
        return out

## Train, val, test loops

In [3]:
from tqdm.notebook import tqdm # status bar

In [4]:
def train(model, data, loss_fn, optimizer, epochs=3):

    for epoch in range(epochs):

        epoch_loss = []

        for batch_num, (samples, labels) in enumerate(tqdm(data)):

            # forward pass
            prediction = model(samples.transpose(0,1))
            loss = loss_fn(prediction, labels.view(1, -1))

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # record loss
            epoch_loss.append(loss.log10().item())
        
        # Print the loss for this epoch
        if (epoch+1) % 1 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
        
    return epoch_loss

## Load data

In [5]:
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd

In [19]:
class BikeDataset(Dataset):
    def __init__(self, csv_file, seq_length, train=True, train_split_ratio=0.7):
        self.data_frame = pd.read_csv(csv_file)
        self.seq_length = seq_length
        self.train = train
        
        # Perform train/test split
        #dataset_size = len(self.data_frame) - self.seq_length
        #train_size = int(train_split_ratio * dataset_size)
        #test_size = dataset_size - train_size
        #print(dataset_size, train_size, test_size)
        #self.train_dataset, self.test_dataset = random_split(self.data_frame, [train_size, test_size])
        self.train_dataset, self.test_dataset = self.data_frame, self.data_frame
        
    def __len__(self):
        if self.train:
            return len(self.train_dataset) - self.seq_length
        else:
            return len(self.test_dataset) - self.seq_length
    
    def __getitem__(self, index):
        if self.train:
            dataset = self.train_dataset
        else:
            dataset = self.test_dataset
        beg_idx, end_idx = index, index+self.seq_length
        input_features = torch.tensor(dataset.iloc[beg_idx:end_idx,:-1].values,dtype=torch.float32)
        target_label = torch.tensor(dataset.iloc[end_idx,-1],dtype=torch.float32)
        return input_features, target_label

In [20]:
# Define the CSV file path
csv_file = 'data/Bike-Sharing-Dataset/hour.csv'

# Create an instance of the custom dataset
train_dataset = BikeDataset(csv_file, 12, train=True)
test_dataset = BikeDataset(csv_file, 12, train=False)

In [21]:
# Use PyTorch's DataLoader to create a data loader for batching and shuffling
batch_size = 1
shuffle = True
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
test_dl = DataLoader(test_dataset, batch_size=1, shuffle=shuffle)

# Iterate over the data loader to access batches of data
for batch in train_dl:
    input_features, target_label = batch
    #print('Input Features:', input_features)
    #print('Target Label:', target_label)
    print(input_features.transpose(0,1).shape)
    print(target_label.shape)
    break

torch.Size([12, 1, 11])
torch.Size([1])


## Fit models

### LSTM

In [22]:
# Define the input dimensions, hidden dimensions, and output dimensions
input_dim = 11
hidden_dim = 50
output_dim = 1

# Create an instance of the LSTM model
model = LSTMModel(input_dim, hidden_dim, output_dim)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [29]:
# Testing
for batch in test_dl:
    samples, labels = batch
    #print('Samples:', samples)
    #print('Label:', labels)
    print('input shape:', samples.transpose(0,1).shape)
    print('label shape:', labels.view(1, -1).shape)
    print('pred shape:', model(samples.transpose(0,1)).shape)
    prediction = model(samples.transpose(0,1))
    loss = loss_fn(prediction, labels.view(1, -1))
    print(samples.transpose(0,1).shape)
    print(prediction)
    print(labels)
    break

input shape: torch.Size([12, 1, 11])
label shape: torch.Size([1, 1])
pred shape: torch.Size([1, 1])
torch.Size([12, 1, 11])
tensor([[6.0962]], grad_fn=<AddmmBackward0>)
tensor([74.])


In [24]:
_ = train(model, train_dl, loss_fn, optimizer, epochs=5)

  0%|          | 0/17367 [00:00<?, ?it/s]

Epoch [1/5], Loss: 24923.5566


  0%|          | 0/17367 [00:00<?, ?it/s]

Epoch [2/5], Loss: 81.0088


  0%|          | 0/17367 [00:00<?, ?it/s]

Epoch [3/5], Loss: 13734.3086


  0%|          | 0/17367 [00:00<?, ?it/s]

Epoch [4/5], Loss: 124.6351


  0%|          | 0/17367 [00:00<?, ?it/s]

Epoch [5/5], Loss: 1314.3922


In [None]:
# Evaluate the LSTM model
with torch.no_grad():
    model.eval()
    test_input = torch.randn(5, input_dim).unsqueeze(1)
    test_output = model(test_input)
    print('Input:', test_input.view(-1).numpy())
    print('Output:', test_output.view(-1).numpy())

## Results