# Project 2

## Load libs

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Model definitions

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[-1] # Only keep the last output in the sequence
        out = self.fc(lstm_out)
        return out

## Train, val, test loops

In [None]:
from tqdm.notebook import tqdm # status bar

In [None]:
def train(model, data, loss_fn, optimizer, epochs=3):

    for epoch in range(epochs):

        for batch_num, (samples, labels) in enumerate(tqdm(data)):

            # forward pass
            prediction = model(samples.transpose(0,1))
            loss = loss_fn(prediction, labels.view(1, -1))

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Print the loss for this epoch
        if (epoch+1) % 1 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

## Load data

In [None]:
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd

In [None]:
class BikeDataset(Dataset):
    def __init__(self, csv_file, seq_length, train=True, train_split_ratio=0.7):
        self.df = pd.read_csv(csv_file)
        self.seq_length = seq_length
        self.train = train
        
        # Perform train/test split
        dataset_size = len(self.df) - self.seq_length
        train_size = int(train_split_ratio * dataset_size)
        #self.train_df, self.test_df = self.df, self.df # TODO: not this
        self.train_df, self.test_df = self.df[:train_size], self.df[train_size:]
        
    def __len__(self):
        if self.train:
            return len(self.train_df) - self.seq_length
        else:
            return len(self.test_df) - self.seq_length
    
    def __getitem__(self, index):
        if self.train:
            dataset = self.train_df
        else:
            dataset = self.test_df
        beg_idx, end_idx = index, index+self.seq_length
        input_features = torch.tensor(dataset.iloc[beg_idx:end_idx,:-1].values,dtype=torch.float32)
        target_label = torch.tensor(dataset.iloc[end_idx,-1],dtype=torch.float32)
        return input_features, target_label

In [None]:
# Define the CSV file path
csv_file = 'data/Bike-Sharing-Dataset/hour.csv'

seq_length = 30

# Create an instance of the custom dataset
train_dataset = BikeDataset(csv_file, seq_length, train=True)
test_dataset = BikeDataset(csv_file, seq_length, train=False)

In [None]:
# Use PyTorch's DataLoader to create a data loader for batching and shuffling
batch_size = 1
shuffle = True
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
test_dl = DataLoader(test_dataset, batch_size=1, shuffle=shuffle)

# Iterate over the data loader to access batches of data
for batch in train_dl:
    input_features, target_label = batch
    #print('Input Features:', input_features)
    #print('Target Label:', target_label)
    print(input_features.transpose(0,1).shape)
    print(target_label.shape)
    break

## Fit models

### LSTM

In [None]:
# Define the input dimensions, hidden dimensions, and output dimensions
input_dim = 11
hidden_dim = 30
output_dim = 1

# Create an instance of the LSTM model
model = LSTMModel(input_dim, hidden_dim, output_dim)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Testing
with torch.no_grad():
    model.eval()
    for batch in test_dl:
        samples, labels = batch
        #print('Samples:', samples)
        #print('Label:', labels)
        #print('input shape:', samples.transpose(0,1).shape)
        #print('label shape:', labels.view(1, -1).shape)
        #print('pred shape:', model(samples.transpose(0,1)).shape)
        prediction = model(samples.transpose(0,1))
        loss = loss_fn(prediction, labels.view(1, -1))
        #print(samples.transpose(0,1).shape)
        print(prediction)
        print(labels.view(1, -1))
        print(loss)
        break

In [None]:
train(model, train_dl, loss_fn, optimizer, epochs=5)

### Results