In [1]:
from model import *
from data import *

import itertools

In [2]:
config = Config().load("config.json")

In [3]:
# Hyperparameters for tuning
#
# History steps
# Future steps
#
# Discrete projection dims (Basin and River)
# Projection dimension (Basin and River)
#
# Graph attention hidden dimension
# Graph attention layers
#
# LSTM layers
# LSTM hidden dimension
#
# CMAL hidden dimension
# CMAL mixtures
#
# Dropout (Probably not)
# Batch size (Probably not)
#
# x2 for encoder and decoder?

In [4]:
def trainModel(config, modelClass, dataClass, objective, epochs, criterion: dict[str: nn.Module]):
    dataset = dataClass(config)

    dataset.info(dataset[0])

    trainSize = len(dataset)
    train = torch.utils.data.Subset(dataset, range(int(trainSize * config.dataSplit)))
    test = torch.utils.data.Subset(dataset, range(int(trainSize * config.dataSplit), trainSize))

    train = DataLoader(train, batch_size=config.batchSize)
    test = DataLoader(test, batch_size=config.batchSize)

    dataset.info(train[0])

    model = modelClass(config)
    print(f"Model has {sum([p.numel() for p in model.parameters()])} parameters")
    print(f"Dataset has {len(dataset)} samples")

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    testIter = itertools.cycle(test)

    for epoch in range(epochs):
        progress = 0
        for inputs, targets in train:
            model.eval()
            optimizer.zero_grad()

            history, future, thresholds = targets.dischargeHistory, targets.dischargeFuture, targets.thresholds
            hindcast, forecast = model(inputs)
            loss = objective(hindcast, history[:, -1])
            trainLoss = loss.item()

            loss.backward()
            optimizer.step()

            with torch.no_grad():
                model.eval()
                inputs, targets = next(testIter)

                history, future, thresholds = targets.dischargeHistory, targets.dischargeFuture, targets.thresholds
                hindcast, forecast = model(inputs)
                loss = objective(hindcast, history[:, -1])
                testLoss = loss.item()

            print(f"\r{(progress / len(train)) * 100:.3f}% |  Train Loss: {trainLoss} | Test Loss: {testLoss}")
        print()

    return model

In [5]:
metrics = {
    "Precision": CMALLoss()
}

trainModel(config, InundationStation, InundationData, CMALLoss(), epochs=10, criterion=metrics)

Loading GeoPandas...
GeoPandas Loaded
2544/2544 GRDC files loaded
9640/9640 ERA5 files queued
57622/57646 Basin Structures Appended to Graph
Upstream Basins Compiled | 1.0 | 17.43485477178423
Upstream Structures Compiled
Structure Tensors Complete
Index Mapping Complete
Static Input Scaling Complete
Model has 1101688 parameters
Dataset has 56533563 samples


RuntimeError: The expanded size of the tensor (7) must match the existing size (277) at non-singleton dimension 2.  Target sizes: [1, 1, 7, 1].  Tensor sizes: [128, 277, 1]