# Irish Weather — Simple RNN & LSTM (Dublin t2m)

This lightweight notebook mirrors the lecture slides and keeps code very small.
We use **one CSV**: `era5_ireland3_t2m_wind_2024.csv` and predict the **next hour** of Dublin temperature from the **previous 24 hours**.
Models: (1) `nn.RNN` (vanilla RNN), (2) `nn.LSTM` — no GRUs.

In [None]:
# %pip install pandas numpy scikit-learn torch matplotlib

import warnings, pathlib
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

CSV_PATH = "era5_ireland3_t2m_wind_2024.csv"
assert pathlib.Path(CSV_PATH).exists(), "Place era5_ireland3_t2m_wind_2024.csv next to this notebook."
print("Found CSV ✓")
device = "cpu"


In [None]:
# Load & quick look
df = pd.read_csv(CSV_PATH, parse_dates=['time']).sort_values('time').reset_index(drop=True)
print(df.head(5))
print("Coverage:", df['time'].min(), "→", df['time'].max(), f"({len(df)} hourly rows)")

In [None]:
# Time split: 70/15/15
n = len(df)
i_tr = int(0.70*n); i_va = int(0.85*n)
splits = {'train': (0, i_tr), 'val': (i_tr, i_va), 'test': (i_va, n)}
print(splits)

# Build 24->next windows on scaled Dublin temp
T = 24
series = df[['Dublin_t2m_degC']].copy()
sc = StandardScaler().fit(series.iloc[splits['train'][0]:splits['train'][1]])
x_all = sc.transform(series.values)  # (N,1)

def make_windows(x, T=24):
    Xs, ys = [], []
    for t in range(len(x)-T):
        Xs.append(x[t:t+T])
        ys.append(x[t+T])
    return np.stack(Xs), np.stack(ys)

X_all, y_all = make_windows(x_all, T)
M = len(X_all); i_tr_w = int(0.70*M); i_va_w = int(0.85*M)
Xtr, ytr = X_all[:i_tr_w], y_all[:i_tr_w]
Xva, yva = X_all[i_tr_w:i_va_w], y_all[i_tr_w:i_va_w]
Xte, yte = X_all[i_va_w:], y_all[i_va_w:]
print("Shapes:", Xtr.shape, ytr.shape, "|", Xva.shape, yva.shape, "|", Xte.shape, yte.shape)

class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self): return len(self.X)
    def __getitem__(self, i): return self.X[i], self.y[i]

tr_dl = DataLoader(SeqDataset(Xtr,ytr), batch_size=128, shuffle=True)
va_dl = DataLoader(SeqDataset(Xva,yva), batch_size=256)
te_dl = DataLoader(SeqDataset(Xte,yte), batch_size=256)

### Model 1: Vanilla RNN (`nn.RNN`)

Small hidden size, 8 epochs, gradient clipping. We'll compare to LSTM.

In [None]:
class TinyRNN(nn.Module):
    def __init__(self, hidden=16):
        super().__init__()
        self.rnn = nn.RNN(1, hidden, nonlinearity='tanh', batch_first=True)
        self.head = nn.Linear(hidden, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.head(out[:,-1,:])

torch.manual_seed(0)
model_rnn = TinyRNN()
opt = torch.optim.Adam(model_rnn.parameters(), lr=3e-3)
loss_fn = nn.MSELoss()

def eval_mse(model, loader):
    model.eval(); tot=0; n=0
    with torch.no_grad():
        for xb,yb in loader:
            yh = model(xb)
            l = loss_fn(yh, yb).item()
            tot += l*len(xb); n += len(xb)
    return tot/n

for ep in range(8):
    model_rnn.train(); run=0; ntr=0
    for xb,yb in tr_dl:
        opt.zero_grad()
        yh = model_rnn(xb)
        loss = loss_fn(yh, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model_rnn.parameters(), 1.0)
        opt.step()
        run += loss.item()*len(xb); ntr += len(xb)
    val = eval_mse(model_rnn, va_dl)
    print(f"Epoch {ep+1:02d} | train MSE={run/ntr:.4f} | val MSE={val:.4f}")

# Test RMSE back in °C
def rmse_c(model, loader, scaler):
    model.eval(); P=[]; T_=[]
    with torch.no_grad():
        for xb,yb in loader:
            P.append(model(xb).numpy()); T_.append(yb.numpy())
    P = np.vstack(P); T_ = np.vstack(T_)
    P_inv = scaler.inverse_transform(P); T_inv = scaler.inverse_transform(T_)
    return float(np.sqrt(((P_inv - T_inv)**2).mean())), P_inv, T_inv

rmse_rnn, pred_rnn, true_rnn = rmse_c(model_rnn, te_dl, sc)
print(f"Test RMSE — RNN: {rmse_rnn:.3f} °C")

plt.figure(figsize=(10,3))
plt.plot(true_rnn[:200,0], label='Truth')
plt.plot(pred_rnn[:200,0], label='RNN pred')
plt.title('RNN — Dublin t2m (test segment)'); plt.legend(); plt.grid(True); plt.show()

### Model 2: LSTM (`nn.LSTM`)

Same task/setup as the RNN so results are comparable.

In [None]:
class TinyLSTM(nn.Module):
    def __init__(self, hidden=32):
        super().__init__()
        self.rnn = nn.LSTM(1, hidden, batch_first=True)
        self.head = nn.Linear(hidden, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.head(out[:,-1,:])

torch.manual_seed(0)
model_lstm = TinyLSTM()
opt = torch.optim.Adam(model_lstm.parameters(), lr=3e-3)
loss_fn = nn.MSELoss()

for ep in range(8):
    model_lstm.train(); run=0; ntr=0
    for xb,yb in tr_dl:
        opt.zero_grad()
        yh = model_lstm(xb)
        loss = loss_fn(yh, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model_lstm.parameters(), 1.0)
        opt.step()
        run += loss.item()*len(xb); ntr += len(xb)
    val = eval_mse(model_lstm, va_dl)
    print(f"Epoch {ep+1:02d} | train MSE={run/ntr:.4f} | val MSE={val:.4f}")

rmse_lstm, pred_lstm, true_lstm = rmse_c(model_lstm, te_dl, sc)
print(f"Test RMSE — LSTM: {rmse_lstm:.3f} °C")

plt.figure(figsize=(10,3))
plt.plot(true_lstm[:200,0], label='Truth')
plt.plot(pred_lstm[:200,0], label='LSTM pred')
plt.title('LSTM — Dublin t2m (test segment)'); plt.legend(); plt.grid(True); plt.show()

**Takeaways (from the slides):**
- RNNs can struggle with long dependencies (vanishing/exploding gradients).
- LSTMs add gates + cell state to help remember/forget information.
- Always: standardise inputs, use sensible window lengths, clip gradients, watch val loss.

**Try next:** change `T` to 12/48; train for 20 epochs; add Dublin wind as a second feature.