In [2]:
import os

import numpy as np
import pandas as pd
import shap
import torch
import torch.nn as nn
from joblib import load
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Config

In [3]:
target_gauge_id = 3465
predict_next_hours = 24

### Data Loading

In [17]:
data = pd.read_csv(f"../dataset/processed/simple/{target_gauge_id}/{predict_next_hours}.gz")
data.dropna(how="any", inplace=True)

In [5]:
x_data = data.loc[:, (data.columns != "DATE") & (data.columns != "TARGET")]
y_data = data["TARGET"]

In [6]:
x_data

Unnamed: 0,DISTANCE_0,DISTANCE_1,DISTANCE_2,DISTANCE_3,DISTANCE_4,DISTANCE_5,DISTANCE_6,DISTANCE_7,DISTANCE_8,DISTANCE_9,...,LEVEL_9_62,LEVEL_9_63,LEVEL_9_64,LEVEL_9_65,LEVEL_9_66,LEVEL_9_67,LEVEL_9_68,LEVEL_9_69,LEVEL_9_70,LEVEL_9_71
0,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,78.500000,78.333333,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667
1,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,78.333333,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333
2,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667
3,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667,59.500000
4,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667,59.500000,59.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11563,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,89.333333,88.000000,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667
11564,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,88.000000,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000
11565,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000,86.000000
11566,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,19.327307,...,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000,86.000000,81.833333


In [7]:
y_data

0        150.000000
1        143.833333
2        137.000000
3        131.666667
4        127.666667
            ...    
11563    138.666667
11564    139.166667
11565    140.166667
11566    141.000000
11567    141.166667
Name: TARGET, Length: 11067, dtype: float64

### Data Processinng

In [8]:
# Split datasets into train and test
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=42)

# Normalize datasets with scaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [9]:
# Convert to PyTorch tensors
x_train_tensor = torch.tensor(x_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
x_test_tensor = torch.tensor(x_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

### Model Setup

In [10]:
# Instantiate the model
model = nn.Sequential(
    nn.Linear(x_train_tensor.shape[1], x_train_tensor.shape[1] * 2),
    nn.ReLU(),
    nn.Linear(x_train_tensor.shape[1] * 2, x_train_tensor.shape[1]),
    nn.ReLU(),
    nn.Linear(x_train_tensor.shape[1], x_train_tensor.shape[1] // 4),
    nn.ReLU(),
    nn.Linear(x_train_tensor.shape[1] // 4, 1),
    nn.Softplus(),
)

### Loading Model

In [11]:
directory = f"../models/simple/{target_gauge_id}"

# Load our scaler
scaler = load(f"{directory}/{predict_next_hours}.bin")

# Load our model
model.load_state_dict(torch.load(f"{directory}/{predict_next_hours}.pth"))
model.eval()

Sequential(
  (0): Linear(in_features=730, out_features=1460, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1460, out_features=730, bias=True)
  (3): ReLU()
  (4): Linear(in_features=730, out_features=182, bias=True)
  (5): ReLU()
  (6): Linear(in_features=182, out_features=1, bias=True)
  (7): Softplus(beta=1.0, threshold=20.0)
)

### Evaluating Mode

In [14]:
with torch.no_grad():
    y_pred = pd.DataFrame(model(x_test_tensor).numpy(), columns=["PREDICTION"])
    results = pd.merge(y_test.reset_index(drop=True), y_pred, left_index=True, right_index=True)

In [15]:
results["DIFF"] = (results["TARGET"] - results["PREDICTION"])**2

In [16]:
results

Unnamed: 0,TARGET,PREDICTION,DIFF
0,120.166667,123.495811,11.083205
1,257.166667,255.843231,1.751481
2,124.500000,127.635887,9.833788
3,112.000000,119.050911,49.715345
4,166.000000,169.753021,14.085168
...,...,...,...
3316,136.166667,137.346558,1.392143
3317,189.666667,188.798309,0.754044
3318,275.166667,279.107391,15.529311
3319,212.000000,213.173065,1.376082
