In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

sns.set_style("whitegrid")

# Data Loading, Cleaning, EDA

In [2]:
train_path = "dataset/delhi-climate-data/DailyDelhiClimateTrain.csv"
test_path = "dataset/delhi-climate-data/DailyDelhiClimateTest.csv"
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

def clean_df(df):
    return (df
        .loc[:, ["date", "meantemp"]]
        .sort_values("date", ascending=True)
        .assign(
            date=lambda df_: pd.to_datetime(df_["date"]), 
            meantemp=lambda df_: df_["meantemp"].astype("float32")
        )
        .set_index("date")
        .resample("1d")
        .ffill()
        .reset_index()
    )

df_train = clean_df(df_train)
df_test = clean_df(df_test)

print(df_train.shape, df_test.shape)
df_train.head()

(1462, 2) (114, 2)


Unnamed: 0,date,meantemp
0,2013-01-01,10.0
1,2013-01-02,7.4
2,2013-01-03,7.166667
3,2013-01-04,8.666667
4,2013-01-05,6.0


# Create Dataset

In [32]:
x_train = df_train["meantemp"].values
x_test = df_test["meantemp"].values

x_train_looped = []
for i in range(x_train.shape[0]):
    x = np.array(x_train[i : i + 13])
    if x.shape[0] == 13:
        x_train_looped.append(x.reshape(1, 13))
x_train_looped = np.array(x_train_looped).reshape(-1, 13)


x_test_looped = []
for i in range(x_test.shape[0]):
    x = np.array(x_test[i : i + 13])
    if x.shape[0] == 13:
        x_test_looped.append(x.reshape(1, 13))
x_test_looped = np.array(x_test_looped).reshape(-1, 13)

print(x_train_looped.shape, x_test_looped.shape)

(1450, 13) (102, 13)


For training set, this is 1450 samples, each consisting of 13 months.

For test set, this is 102 samples, each consisting of 13 months.

Next we need to create y from this.

In [34]:
x_train_torch = torch.from_numpy(x_train_looped[:, :-1])
y_train_torch = torch.from_numpy(x_train_looped[:, 1:])
x_test_torch = torch.from_numpy(x_test_looped[:, :-1])
y_test_torch = torch.from_numpy(x_test_looped[:, 1:])

print(x_train_torch.shape, y_train_torch.shape)
print(x_test_torch.shape, y_test_torch.shape)

torch.Size([1450, 12]) torch.Size([1450, 12])
torch.Size([102, 12]) torch.Size([102, 12])


In [35]:
x_train_torch

tensor([[10.0000,  7.4000,  7.1667,  ..., 11.0000, 15.7143, 14.0000],
        [ 7.4000,  7.1667,  8.6667,  ..., 15.7143, 14.0000, 15.8333],
        [ 7.1667,  8.6667,  6.0000,  ..., 14.0000, 15.8333, 12.8333],
        ...,
        [16.0833, 17.8571, 19.8000,  ..., 16.8500, 17.2174, 15.2381],
        [17.8571, 19.8000, 18.0500,  ..., 17.2174, 15.2381, 14.0952],
        [19.8000, 18.0500, 17.2857,  ..., 15.2381, 14.0952, 15.0526]])

In [36]:
y_train_torch

tensor([[ 7.4000,  7.1667,  8.6667,  ..., 15.7143, 14.0000, 15.8333],
        [ 7.1667,  8.6667,  6.0000,  ..., 14.0000, 15.8333, 12.8333],
        [ 8.6667,  6.0000,  7.0000,  ..., 15.8333, 12.8333, 14.7143],
        ...,
        [17.8571, 19.8000, 18.0500,  ..., 17.2174, 15.2381, 14.0952],
        [19.8000, 18.0500, 17.2857,  ..., 15.2381, 14.0952, 15.0526],
        [18.0500, 17.2857, 15.5500,  ..., 14.0952, 15.0526, 10.0000]])

In [37]:
x_test_torch

tensor([[15.9130, 18.5000, 17.1111,  ..., 12.1111, 11.0000, 11.7895],
        [18.5000, 17.1111, 18.7000,  ..., 11.0000, 11.7895, 13.2353],
        [17.1111, 18.7000, 18.3889,  ..., 11.7895, 13.2353, 13.2000],
        ...,
        [27.8571, 29.2500, 29.2500,  ..., 33.5000, 34.5000, 34.2500],
        [29.2500, 29.2500, 29.6667,  ..., 34.5000, 34.2500, 32.9000],
        [29.2500, 29.6667, 30.5000,  ..., 34.2500, 32.9000, 32.8750]])

In [38]:
y_test_torch

tensor([[18.5000, 17.1111, 18.7000,  ..., 11.0000, 11.7895, 13.2353],
        [17.1111, 18.7000, 18.3889,  ..., 11.7895, 13.2353, 13.2000],
        [18.7000, 18.3889, 19.3182,  ..., 13.2353, 13.2000, 16.4348],
        ...,
        [29.2500, 29.2500, 29.6667,  ..., 34.5000, 34.2500, 32.9000],
        [29.2500, 29.6667, 30.5000,  ..., 34.2500, 32.9000, 32.8750],
        [29.6667, 30.5000, 31.2222,  ..., 32.9000, 32.8750, 32.0000]])

# Model Architecture