<a href="https://colab.research.google.com/github/alihuss1017/LSTM-Weather-Prediction/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import userdata
gh_token = userdata.get('GITHUB_TOKEN')

In [12]:
!git clone https://{gh_token}@github.com/alihuss1017/LSTM-Weather-Prediction.git

Cloning into 'LSTM-Weather-Prediction'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 28 (delta 11), reused 5 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (28/28), 92.36 KiB | 8.40 MiB/s, done.
Resolving deltas: 100% (11/11), done.


In [13]:
cd LSTM-Weather-Prediction

/content/LSTM-Weather-Prediction


In [14]:
import pandas as pd
df = pd.read_csv('data/seattle-weather.csv')

## Checking for Null Values and Duplicates

In [15]:
print(f'''Number of null values:\n{df.isnull().sum()}\n\nNumber of duplicated rows: {df.duplicated().sum()}''')

Number of null values:
date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

Number of duplicated rows: 0


## Setting DateTime as Index

In [16]:
df = df.set_index(df["date"])
df = df.drop('date', axis = 1)


## Saving the mean and standard deviation for Inference Purposes

In [17]:
mu, std = df['temp_max'].mean(), df['temp_max'].std()

## One-Hot Encoding Categorical Features:

In [18]:
df_encoded = pd.get_dummies(df, columns = ['weather'])

## Applying Z-Score Normalization on Numerical Features

In [19]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
columns_to_normalize = df_encoded.select_dtypes(include='float').columns.tolist()

df_encoded[columns_to_normalize] = scaler.fit_transform(df_encoded[columns_to_normalize])

## Defining the PyTorch Custom Dataset Class

In [20]:
import torch
from torch.utils.data import Dataset, DataLoader

class WeatherDataset(Dataset):

  def __init__(self, data_df, seq_len):
    self.data = data_df
    self.seq_len = seq_len

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    cols_to_cast = self.data.select_dtypes(include = ['object', 'bool']).columns.tolist()
    for col in cols_to_cast:
      self.data[col] = self.data[col].astype('int')

    x = torch.tensor(self.data.iloc[idx:idx+self.seq_len].values, dtype = torch.float32)
    y = torch.tensor(self.data['temp_max'].iloc[idx+self.seq_len+1], dtype = torch.float32)

    return x, y


## Defining Model

In [21]:
import torch.nn as nn

input_features = 9

class lstmModel(nn.Module):
  def __init__(self, hidden_features, num_layers):
    super().__init__()

    self.lstm = nn.LSTM(input_size = input_features,
                        hidden_size = hidden_features, num_layers = num_layers,
                        batch_first = True)
    self.fc = nn.Linear(hidden_features, 1)

  def forward(self, x):
    out, [h, c] = self.lstm(x)
    return self.fc(h[-1])


## Model Debugging

In [22]:
model = lstmModel(32, 2)
model.eval()
with torch.no_grad():
  print(f'Output: {model(torch.rand((5, 9)))}')

Output: tensor([0.1446])


## Configuring Device

In [23]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

print(f'Device: {device}')

Device: cuda


## Training Model

In [24]:
def train(model, train_loader, optimizer, loss_fn):
  model.train()
  total_loss = 0
  for X, y in train_loader:
    optimizer.zero_grad()

    X, y = X.to(device), y.to(device)
    y_hat = model(X)

    loss = loss_fn(y, y_hat)
    total_loss += loss.item()

    loss.backward()
    optimizer.step()

  return total_loss / len(train_loader)


## Evaluating Model

In [25]:
import matplotlib.pyplot as plt
import numpy as np


def eval(model, val_loader, loss_fn):
  predicted = []
  actual = []
  total_loss = 0
  model.eval()

  with torch.no_grad():
    for X, y in val_loader:
      X, y = X.to(device), y.to(device)
      y_hat = model(X)

      loss = loss_fn(y, y_hat)
      total_loss += loss.item()

  return total_loss / len(val_loader)




In [26]:
!pip install wandb
!pip install optuna



In [27]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33malihuss1017[0m ([33malihuss1017-uc-san-diego[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [28]:
!pip install optuna-integration[wandb]



In [29]:
from optuna.integration.wandb import WeightsAndBiasesCallback

In [30]:
import optuna
import wandb
from torch.utils.data import Subset
import torch.optim as optim


def objective(trial):

  seq_len = trial.suggest_int('seq_len', 5, 20)
  batch_size = trial.suggest_int('batch_size', 16, 64)
  hidden_features = trial.suggest_int('hidden_features', 32, 128)
  num_layers = trial.suggest_int('num_layers', 1, 3)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  num_epochs = trial.suggest_int('num_epochs', 5, 10)
  loss_fn = nn.MSELoss()

  wandb.init(
      project="lstm-weather",
      config={
          'seq_len': seq_len,
          'batch_size': batch_size,
          'hidden_features': hidden_features,
          'num_layers': num_layers,
          'lr': lr,
          'num_epochs': num_epochs
      }
  )

  dataset = WeatherDataset(data_df = df_encoded, seq_len = seq_len)
  train_len = int(0.7 * len(dataset))

  train_data = Subset(dataset, range(train_len))
  val_data = Subset(dataset, range(train_len, len(dataset)))

  train_loader = DataLoader(train_data, batch_size = 32, num_workers = 2, drop_last = True)
  val_loader = DataLoader(val_data, batch_size = 32, num_workers = 2, drop_last = True )

  model = lstmModel(hidden_features = hidden_features, num_layers = num_layers).to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr = lr)

  for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    val_loss = eval(model, val_loader, loss_fn)

    wandb.log({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss})

  return val_loss



In [None]:
wandb_callback = WeightsAndBiasesCallback(metric_name="val_loss", wandb_kwargs={'project': 'lstm-weather'})

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, callbacks=[wandb_callback])

best_trial = study.best_trial

  wandb_callback = WeightsAndBiasesCallback(metric_name="val_loss", wandb_kwargs={'project': 'lstm-weather'})
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Currently logged in as: [33malihuss1017[0m ([33malihuss1017-uc-san-diego[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[I 2025-11-02 19:41:56,876] A new study created in memory with name: no-name-26a3c286-60d5-4386-b72b-1ec9e6f0d91e
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:13,412] Trial 0 finished with value: 0.29073649186354417 and parameters: {'seq_len': 13, 'batch_size': 42, 'hidden_features': 62, 'num_layers': 3, 'lr': 0.007236687794546415, 'num_epochs': 10}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▂▁▁▁▁▁▁
val_loss,█▄▆▃▂▁▂▂▁▁

0,1
epoch,9.0
train_loss,0.30283
val_loss,0.29074


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:19,789] Trial 1 finished with value: 0.36227999742214495 and parameters: {'seq_len': 10, 'batch_size': 45, 'hidden_features': 102, 'num_layers': 3, 'lr': 0.00786144404549479, 'num_epochs': 5}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
val_loss,█▃▁▂▂

0,1
epoch,4.0
train_loss,0.36975
val_loss,0.36228


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:27,480] Trial 2 finished with value: 0.3151882073053947 and parameters: {'seq_len': 6, 'batch_size': 45, 'hidden_features': 75, 'num_layers': 2, 'lr': 0.00668046005257047, 'num_epochs': 6}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▄▅▇█
train_loss,█▄▂▁▂▁
val_loss,▂█▁▂▄▁

0,1
epoch,5.0
train_loss,0.33642
val_loss,0.31519


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:37,401] Trial 3 finished with value: 0.3213668901186723 and parameters: {'seq_len': 12, 'batch_size': 29, 'hidden_features': 102, 'num_layers': 3, 'lr': 0.0002447103145809665, 'num_epochs': 10}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_loss,0.31515
val_loss,0.32137


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:46,712] Trial 4 finished with value: 0.30434974053731334 and parameters: {'seq_len': 13, 'batch_size': 43, 'hidden_features': 52, 'num_layers': 2, 'lr': 0.0017485815306138614, 'num_epochs': 8}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▄▅▆▇█
train_loss,█▂▁▁▁▁▁▁
val_loss,▅█▅▃▂▁▁▁

0,1
epoch,7.0
train_loss,0.3116
val_loss,0.30435


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:42:55,721] Trial 5 finished with value: 0.31716852004711443 and parameters: {'seq_len': 13, 'batch_size': 16, 'hidden_features': 123, 'num_layers': 3, 'lr': 0.004964729403248296, 'num_epochs': 8}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▄▅▆▇█
train_loss,█▄▂▂▂▁▁▁
val_loss,█▅▃▄▁▁▁▂

0,1
epoch,7.0
train_loss,0.32826
val_loss,0.31717


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:04,231] Trial 6 finished with value: 0.29388851901659596 and parameters: {'seq_len': 18, 'batch_size': 17, 'hidden_features': 111, 'num_layers': 2, 'lr': 0.0003203498293270329, 'num_epochs': 7}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▅▆▇█
train_loss,█▃▁▁▁▁▁
val_loss,█▁▁▁▁▁▁

0,1
epoch,6.0
train_loss,0.30233
val_loss,0.29389


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:11,751] Trial 7 finished with value: 0.6402287953175031 and parameters: {'seq_len': 20, 'batch_size': 26, 'hidden_features': 62, 'num_layers': 2, 'lr': 6.479537634164016e-05, 'num_epochs': 7}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▇▇▆▅▃▁
val_loss,█▇▇▆▅▃▁▁

0,1
batch_size,26.0
epoch,6.0
hidden_features,62.0
lr,6e-05
num_epochs,7.0
num_layers,2.0
seq_len,20.0
train_loss,0.68785
val_loss,0.64023


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:21,545] Trial 8 finished with value: 0.40747972692434603 and parameters: {'seq_len': 15, 'batch_size': 44, 'hidden_features': 62, 'num_layers': 1, 'lr': 9.88571216272138e-05, 'num_epochs': 9}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
epoch,▁▂▃▄▅▅▆▇█
train_loss,█▇▇▆▅▄▃▂▁
val_loss,█▇▇▆▅▄▃▂▁

0,1
epoch,8.0
train_loss,0.46033
val_loss,0.40748


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:29,811] Trial 9 finished with value: 0.9835530336086566 and parameters: {'seq_len': 20, 'batch_size': 32, 'hidden_features': 104, 'num_layers': 1, 'lr': 1.3106449090686091e-05, 'num_epochs': 7}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▇▆▄▃▂▁
val_loss,█▇▆▄▃▂▁▁

0,1
batch_size,32.0
epoch,6.0
hidden_features,104.0
lr,1e-05
num_epochs,7.0
num_layers,1.0
seq_len,20.0
train_loss,0.98585
val_loss,0.98355


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:39,063] Trial 10 finished with value: 0.3122853453342731 and parameters: {'seq_len': 7, 'batch_size': 59, 'hidden_features': 32, 'num_layers': 3, 'lr': 0.0013545994197709335, 'num_epochs': 10}. Best is trial 0 with value: 0.29073649186354417.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▃▄▅▆▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▃▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁▁

0,1
batch_size,59.0
epoch,9.0
hidden_features,32.0
lr,0.00135
num_epochs,10.0
num_layers,3.0
seq_len,7.0
train_loss,0.31941
val_loss,0.31229


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:48,175] Trial 11 finished with value: 0.2885194667256795 and parameters: {'seq_len': 17, 'batch_size': 55, 'hidden_features': 86, 'num_layers': 2, 'lr': 0.0010149872081758574, 'num_epochs': 9}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▄▅▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▃▁▁▁▁▁▁▁
val_loss,██▄▃▂▂▁▁▁▁

0,1
batch_size,55.0
epoch,8.0
hidden_features,86.0
lr,0.00101
num_epochs,9.0
num_layers,2.0
seq_len,17.0
train_loss,0.30108
val_loss,0.28852


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:43:57,920] Trial 12 finished with value: 0.2934521505465874 and parameters: {'seq_len': 16, 'batch_size': 56, 'hidden_features': 84, 'num_layers': 1, 'lr': 0.0016406224779462817, 'num_epochs': 9}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▄▅▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▂▁▁▁▁▁▁▁
val_loss,▇█▆▄▃▂▁▁▁▁

0,1
batch_size,56.0
epoch,8.0
hidden_features,84.0
lr,0.00164
num_epochs,9.0
num_layers,1.0
seq_len,16.0
train_loss,0.29928
val_loss,0.29345


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:44:06,940] Trial 13 finished with value: 0.2982603552249762 and parameters: {'seq_len': 16, 'batch_size': 53, 'hidden_features': 86, 'num_layers': 2, 'lr': 0.000660770156821709, 'num_epochs': 9}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▄▅▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▂▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
batch_size,53.0
epoch,8.0
hidden_features,86.0
lr,0.00066
num_epochs,9.0
num_layers,2.0
seq_len,16.0
train_loss,0.30181
val_loss,0.29826


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:44:16,325] Trial 14 finished with value: 0.29640834950483763 and parameters: {'seq_len': 10, 'batch_size': 52, 'hidden_features': 42, 'num_layers': 3, 'lr': 0.0029514644953608678, 'num_epochs': 10}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▃▄▅▆▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▃▂▁▁▁▁▁▁▁
val_loss,█▇▃▂▂▂▂▂▁▁▁

0,1
batch_size,52.0
epoch,9.0
hidden_features,42.0
lr,0.00295
num_epochs,10.0
num_layers,3.0
seq_len,10.0
train_loss,0.31181
val_loss,0.29641


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:44:26,116] Trial 15 finished with value: 0.2925184529561263 and parameters: {'seq_len': 18, 'batch_size': 64, 'hidden_features': 75, 'num_layers': 2, 'lr': 0.0008069618237283487, 'num_epochs': 9}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▄▅▅▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
batch_size,64.0
epoch,8.0
hidden_features,75.0
lr,0.00081
num_epochs,9.0
num_layers,2.0
seq_len,18.0
train_loss,0.30454
val_loss,0.29252


  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-11-02 19:44:36,621] Trial 16 finished with value: 0.29797426668497234 and parameters: {'seq_len': 10, 'batch_size': 38, 'hidden_features': 62, 'num_layers': 2, 'lr': 0.00339200990906664, 'num_epochs': 10}. Best is trial 11 with value: 0.2885194667256795.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)


0,1
batch_size,▁
epoch,▁▂▃▃▄▅▆▆▇█
hidden_features,▁
lr,▁
num_epochs,▁
num_layers,▁
seq_len,▁
train_loss,█▂▂▂▁▂▁▂▁▁
val_loss,█▆▅▃▂▂▃▂▁▁▁

0,1
batch_size,38.0
epoch,9.0
hidden_features,62.0
lr,0.00339
num_epochs,10.0
num_layers,2.0
seq_len,10.0
train_loss,0.31523
val_loss,0.29797


In [1]:
for key, val in best_trial.params.items():
  print(f"{key}: {val}")

NameError: name 'best_trial' is not defined