We will first load the data and scale it

In [12]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

df_train = pd.read_csv('./dataset/train.csv')
df_val = pd.read_csv('./dataset/val.csv')
df_test = pd.read_csv('./dataset/test.csv')

scaler_r = MinMaxScaler()
scaler_c = MinMaxScaler()

scaler_r.fit(df_train[['registered']])
scaler_c.fit(df_train[['casual']])


df_train['registered_s'] = scaler_r.transform(df_train[['registered']])
df_val['registered_s'] = scaler_r.transform(df_val[['registered']])
df_test['registered_s'] = scaler_r.transform(df_test[['registered']])

df_train['casual_s'] = scaler_c.transform(df_train[['casual']])
df_val['casual_s'] = scaler_c.transform(df_val[['casual']])
df_test['casual_s'] = scaler_c.transform(df_test[['casual']])

In [13]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,season,workingday,weathersit,temp,hum,casual,registered,day_sin,day_cos,week_sin,week_cos,registered_s,casual_s
0,0,1,0,1,0.24,0.81,3,13,0.0,1.0,-0.781831,0.62349,0.016332,0.008174
1,1,1,0,1,0.22,0.8,8,32,0.258819,0.965926,-0.781831,0.62349,0.040201,0.021798
2,2,1,0,1,0.22,0.8,5,27,0.5,0.866025,-0.781831,0.62349,0.03392,0.013624
3,3,1,0,1,0.24,0.75,3,10,0.707107,0.707107,-0.781831,0.62349,0.012563,0.008174
4,4,1,0,1,0.24,0.75,0,1,0.866025,0.5,-0.781831,0.62349,0.001256,0.0


In [14]:
df_train.drop(['Unnamed: 0','casual','registered'],axis=1,inplace=True)
df_val.drop(['Unnamed: 0','casual','registered'],axis=1,inplace=True)
df_test.drop(['Unnamed: 0','casual','registered'],axis=1,inplace=True)

df_train.head()

Unnamed: 0,season,workingday,weathersit,temp,hum,day_sin,day_cos,week_sin,week_cos,registered_s,casual_s
0,1,0,1,0.24,0.81,0.0,1.0,-0.781831,0.62349,0.016332,0.008174
1,1,0,1,0.22,0.8,0.258819,0.965926,-0.781831,0.62349,0.040201,0.021798
2,1,0,1,0.22,0.8,0.5,0.866025,-0.781831,0.62349,0.03392,0.013624
3,1,0,1,0.24,0.75,0.707107,0.707107,-0.781831,0.62349,0.012563,0.008174
4,1,0,1,0.24,0.75,0.866025,0.5,-0.781831,0.62349,0.001256,0.0


Now Lets create the dataset from the tabular data to work with our model

In [15]:
import numpy as np


def create_dataset(data, feature_columns, target_columns, lookback_window, horizon, shift):
    """
    Transforms a time series DataFrame into input-target pairs for a deep learning model.

    Args:
        data (pd.DataFrame): The input DataFrame.
        feature_columns (list): List of column names to use as input features (X).
        target_columns (list): List of column names to use as target variables (Y).
        lookback_window (int): The number of past time steps to use as input (X).
        horizon (int): The number of future time steps to predict (Y).
        shift (int): The number of steps to shift the window for each new sample.

    Returns:
        tuple: A tuple containing two NumPy arrays, X (inputs) and Y (targets).
    """
    X, Y = [], []
    for i in range(0, len(data) - lookback_window - horizon + 1, shift):
        # Extract the input features (X) for the current window
        x_start = i
        x_end = i + lookback_window
        X.append(data.iloc[x_start:x_end][feature_columns].values)

        # Extract the target variables (Y) for the forecast horizon
        y_start = i + lookback_window
        y_end = y_start + horizon
        Y.append(data.iloc[y_start:y_end][target_columns].values)
    
    return np.array(X), np.array(Y)


In [16]:
#  Define key parameters
LOOKBACK_WINDOW = 24  # last 24 hours of data as input
FORECAST_HORIZON = 3 # Predict the next 3 hours
SHIFT = 1            # Move the window by 1 hour for each new sample

# ----------------------------------------------------
# The columns model use to learn the patterns.

feature_columns = [
    'season', 'workingday', 'weathersit', 'temp', 'hum',
    'day_sin', 'day_cos', 'week_sin', 'week_cos'
]

# The columns model will try to predict.
target_columns = ['casual_s', 'registered_s']



Lets' first create the training dataset and have a look at it

In [20]:
X_train, Y_train = create_dataset(df_train, feature_columns, target_columns, LOOKBACK_WINDOW, FORECAST_HORIZON, SHIFT)


print(f"Shape of Input (X): {X_train.shape}")       # shape will be in the following order
print(f"Shape of Target (Y): {Y_train.shape}")      # [Number of Samples, Window, Number of Features]

# Now let's have a look at the first data sample
print("\n--- Example of First Sample ---")
print("Input X (first window, all features):")
print(X_train[0])
print("\nTarget Y (next 3 hours, only targets):")
print(Y_train[0])

Shape of Input (X): (13877, 24, 9)
Shape of Target (Y): (13877, 3, 2)

--- Example of First Sample ---
Input X (first window, all features):
[[ 1.00000000e+00  0.00000000e+00  1.00000000e+00  2.40000000e-01
   8.10000000e-01  0.00000000e+00  1.00000000e+00 -7.81831482e-01
   6.23489802e-01]
 [ 1.00000000e+00  0.00000000e+00  1.00000000e+00  2.20000000e-01
   8.00000000e-01  2.58819045e-01  9.65925826e-01 -7.81831482e-01
   6.23489802e-01]
 [ 1.00000000e+00  0.00000000e+00  1.00000000e+00  2.20000000e-01
   8.00000000e-01  5.00000000e-01  8.66025404e-01 -7.81831482e-01
   6.23489802e-01]
 [ 1.00000000e+00  0.00000000e+00  1.00000000e+00  2.40000000e-01
   7.50000000e-01  7.07106781e-01  7.07106781e-01 -7.81831482e-01
   6.23489802e-01]
 [ 1.00000000e+00  0.00000000e+00  1.00000000e+00  2.40000000e-01
   7.50000000e-01  8.66025404e-01  5.00000000e-01 -7.81831482e-01
   6.23489802e-01]
 [ 1.00000000e+00  0.00000000e+00  2.00000000e+00  2.40000000e-01
   7.50000000e-01  9.65925826e-01  2.5

In [21]:
X_val, Y_val = create_dataset(df_val, feature_columns, target_columns, LOOKBACK_WINDOW, FORECAST_HORIZON, SHIFT)
X_test, Y_test = create_dataset(df_test, feature_columns, target_columns, LOOKBACK_WINDOW, FORECAST_HORIZON, SHIFT)


Now Let's create dataloader which will serve the above created data to our models for training and testing.

In [22]:
import torch
from torch.utils.data import DataLoader, TensorDataset


# Convert NumPy arrays to PyTorch Tensors
X_train = torch.Tensor(X_train)
y_train = torch.Tensor(Y_train)
X_val = torch.Tensor(X_val)
y_val = torch.Tensor(Y_val)
X_test = torch.Tensor(X_test)
y_test = torch.Tensor(Y_test)

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


Now let's Design our model.  
We will test two models. one solely based on LSTM architecture and another one combining CNN with LSTM.

In [None]:
import torch.nn as nn

