# __Import__

In [1]:
import torch
from preprocessing import run_preprocessing, load_preprocessed_data
from models import NeuralNet, CustomDataset
from sklearn.model_selection import train_test_split

__Helper Functions__

In [2]:
def split_data(df):
    X = df.drop("prec", axis=1)
    y = df["prec"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

    return X_train, X_test, y_train, y_test

__Load Data__

In [3]:
df = load_preprocessed_data()
X_train, X_test, y_train, y_test = split_data(df)
df.shape

(1424400, 26)

# __Neural Net__

__Train__

In [4]:
# create data loader
train_dataset = CustomDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64)
test_dataset = CustomDataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64)

# create model
model = NeuralNet()

# train model
model.fit(num_epochs=10, trainloader=trainloader, testloader=testloader)

Starting epoch 1


                                                               

Epoch 0, Training loss: 3.4746498604334977
Epoch 0, Validation loss: 3.179063055050021
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_1.pth
Starting epoch 2


                                                               

Epoch 1, Training loss: 3.1406764870124544
Epoch 1, Validation loss: 3.060097099493163
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_2.pth
Starting epoch 3


                                                               

Epoch 2, Training loss: 3.0583393586997962
Epoch 2, Validation loss: 2.9983593753949243
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_3.pth
Starting epoch 4


                                                               

Epoch 3, Training loss: 3.0070596440459707
Epoch 3, Validation loss: 2.9589551318204204
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_4.pth
Starting epoch 5


                                                               

Epoch 4, Training loss: 2.9689472406987814
Epoch 4, Validation loss: 2.9270779341303053
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_5.pth
Starting epoch 6


                                                               

Epoch 5, Training loss: 2.939197319233372
Epoch 5, Validation loss: 2.8995880791397175
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_6.pth
Starting epoch 7


                                                               

Epoch 6, Training loss: 2.9147295261684216
Epoch 6, Validation loss: 2.8773301629154595
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_7.pth
Starting epoch 8


                                                               

Epoch 7, Training loss: 2.8938947009910208
Epoch 7, Validation loss: 2.861330593151837
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_8.pth
Starting epoch 9


                                                               

Epoch 8, Training loss: 2.8748790867980545
Epoch 8, Validation loss: 2.8467484589410037
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_9.pth
Starting epoch 10


                                                                

Epoch 9, Training loss: 2.8581615967951017
Epoch 9, Validation loss: 2.833800912867482
Model checkpoint saved at c:\Users\Alexander Lorenz\Documents\GitHub\LamaH-Precipitation-Forecast\model\checkpoints/model_epoch_10.pth


__Evaluate__

In [5]:
model.evaluate(dataloader=testloader)

Test Set RMSE: 2.8338


__Hyperparemeter Tuning__

In [11]:
from torch import nn
from tqdm import tqdm
from ray import tune, train
from ray.tune.schedulers import ASHAScheduler

class NeuralNet(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(24, 64),
      nn.ReLU(),
      nn.Linear(64, 128),
      nn.ReLU(),
      nn.Linear(128, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )


  def forward(self, x):
    return self.layers(x)
  
  
  def fit(self, num_epochs, trainloader, testloader, checkpoint_dir="../model/checkpoints"):
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)

    for epoch in range(num_epochs):
        self.train()
        print(f'Starting epoch {epoch+1}')
        current_loss = 0.0

        for i, data in enumerate(tqdm(trainloader, desc=f'Epoch {epoch+1}', leave=False), 0):
            # prepare
            X, y_true = data

            # forward: predict
            y_pred = self.forward(X)

            # compute loss
            loss = torch.sqrt(loss_function(y_pred, y_true))

            # backpropagating gradient of loss
            optimizer.zero_grad()
            loss.backward()

            # Updating parameters (weights and bias)
            optimizer.step()

            current_loss += loss.item()

        print("Epoch {}, Training loss: {}".format(epoch, current_loss / len(trainloader)))

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        for i, data in enumerate(testloader, 0):
            with torch.no_grad():
                X, y_true = data
                y_pred = self.forward(X)
                loss = torch.sqrt(loss_function(y_pred, y_true))  # RMSE
                val_loss += loss.item()
                val_steps += 1
        train.report(loss=(val_loss / val_steps))
        


In [27]:
def train_model():
    model.fit(num_epochs=1, trainloader=trainloader, testloader=testloader)

config = {
    "l1": tune.choice([2 ** i for i in range(7, 10)]),
    "l2": tune.choice([2 ** i for i in range(7, 10)]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([2, 4])
}

scheduler = ASHAScheduler(
    metric="loss",
    mode="min",
    max_t=10,
    grace_period=1,
    reduction_factor=2
)

result = tune.run(
    train_model,
    resources_per_trial={"cpu": 2, "gpu": 1},
    config=config,
    num_samples=10,
    scheduler=scheduler
)

print("Best config: ", result.get_best_config(metric="loss", mode="min"))


2025-01-05 11:40:29,388	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


ValueError: Unknown argument found in the Trainable function. The function args must include a single 'config' positional parameter.
Found: []