Example workbook through training a PyTorch model.

In [1]:
import os
import sys; sys.path.append("../models")
import utils
import train
import CNN1D

import xarray as xr
import numpy as np
import torch as t

device = "cuda" if t.cuda.is_available() else "cpu"
t.manual_seed(123456)

<torch._C.Generator at 0x7f3c817bedb0>

In [2]:
# note: the aim is for all preprocessing / reshaping to be folded into SimDataset

data_home = "/mnt/g/My Drive/GTC/solodoch_data_minimal"
lats = ["26N", "30S", "55S", "60S"]

lat = lats[0]
data = xr.open_dataset(f"{data_home}/{lat}.nc")

# apply whatever preprocessing we want *before* calling reshape_inputs
pp_data = utils.apply_preprocessing(data,
                                    mode="inputs",
                                    remove_season=True,
                                    remove_trend=True,
                                    standardize=True,
                                    lowpass=True)

# reshape as desired and convert to a tensor
pp_data = utils.reshape_inputs(pp_data, keep_coords=["time", "longitude"], return_pt=True)

# dummy strength data
strength = t.rand(pp_data.shape[0])
X = pp_data; y = strength
# shuffle data
p = np.random.permutation(len(y))
X, y = X[p], y[p]

axes: ['time', 'longitude', 'feature']
variables: ['SSH', 'SST', 'SSS', 'OBP', 'ZWS']
shape: (288, 164, 5)


In [3]:
# for 1D CNN shape *must* be (n_times, n_features, dim_to_conv_over)
X = X.permute(0, 2, 1)

In [4]:
# model name + save directory
name = "test_cnn1d"
save_dir = f"{os.path.dirname(os.path.abspath('.'))}/models"

# changed hyperparameters (defaults set in train.py and <model>.py)
CNN1D.n_pure_layers = 2
CNN1D.n_mix_layers = 2
CNN1D.n_features = X.shape[1]
CNN1D.n_channels = CNN1D.n_features * 5
CNN1D.kernel_size = 5
train.max_iters = 100
# ---------------

model = CNN1D.CNN1D(n_pure_layers=CNN1D.n_pure_layers,
              n_mix_layers=CNN1D.n_mix_layers,
              n_features=CNN1D.n_features,
              n_channels=CNN1D.n_channels,
              kernel_size=CNN1D.kernel_size,
              dropout=CNN1D.dropout).to(device)

# train the model
train.train_model(model, name, X, y, save_dir=save_dir)

device: cuda
4851 parameters.


100%|██████████| 100/100 [00:01<00:00, 63.40it/s]


final loss: 0.07349322736263275
model saved to /home/maish/OTP/models/saved_models/test_cnn1d.pt
loss curve saved to /home/maish/OTP/models/loss_curves/test_cnn1d.png
