In [12]:
import os
import random
import time

import numpy as np
import torch
from absl import app
# from klearn_tcyclone.training_utils.args import FLAGS, ALL_FLAGS
from klearn_tcyclone.training_utils.training_utils import get_default_flag_values
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils import data

from klearn_tcyclone.climada.tc_tracks import TCTracks
from klearn_tcyclone.data_utils import (
    LinearScaler,
)
from klearn_tcyclone.KNF.modules.eval_metrics import RMSE_TCTracks
from klearn_tcyclone.KNF.modules.models import Koopman
from klearn_tcyclone.KNF.modules.train_utils import (
    eval_epoch_koopman,
    train_epoch_koopman,
)
from klearn_tcyclone.knf_data_utils import TCTrackDataset
from klearn_tcyclone.training_utils.training_utils import set_flags
from absl import app, flags

from klearn_tcyclone.training_utils.training_utils import extend_by_default_flag_values

from klearn_tcyclone.koopkernel_seq2seq import KoopmanKernelSeq2Seq, RBFKernel

In [2]:
torch.cuda.is_available()

True

## Import data

Set some specific parameters and load default values for all other parameters.

In [3]:
flag_params = {
    # "seed": 42,
    "year_range": [1980, 1988],
    # "batch_size": 16,
    "num_epochs": 2,
    "train_output_length": 1,
    "input_length": 15
}
flag_params = extend_by_default_flag_values(flag_params)

In [4]:
random.seed(flag_params["seed"])  # python random generator
np.random.seed(flag_params["seed"])  # numpy random generator

torch.manual_seed(flag_params["seed"])
torch.cuda.manual_seed_all(flag_params["seed"])

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

feature_list = [
    "lon",
    "lat",
    "max_sustained_wind",
    # "radius_max_wind",
    # "radius_oci",
    # "central_pressure",
    # "environmental_pressure",
]

# feature_list = [
#     "lon",
#     "lat",
#     "max_sustained_wind",
#     "radius_max_wind",
#     "radius_oci",
#     "central_pressure",
#     "environmental_pressure",
# ]

# these are not contained as flags
# encoder_hidden_dim = flag_params["hidden_dim"]
# decoder_hidden_dim = flag_params["hidden_dim"]
# encoder_num_layers = flag_params["num_layers"]
# decoder_num_layers = flag_params["num_layers"]

output_dim = flag_params["input_dim"]
num_feats = len(feature_list)
learning_rate = flag_params["learning_rate"]
# ---------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device", device)

scaler = LinearScaler()
eval_metric = RMSE_TCTracks

Device cuda


In [5]:
flag_params["year_range"]

[1980, 1988]

In [6]:
flag_params["batch_size"]

32

In [7]:
# Datasets
tc_tracks = TCTracks.from_ibtracs_netcdf(
    provider="usa",
    year_range=flag_params["year_range"],
    basin="NA",
    correct_pres=False,
)

tc_tracks_train, tc_tracks_test = train_test_split(tc_tracks.data, test_size=0.1)



  if ibtracs_ds.dims['storm'] == 0:


In [13]:
len(tc_tracks_train), tc_tracks_train[5]

(73,
 <xarray.Dataset> Size: 8kB
 Dimensions:                 (time: 134)
 Coordinates:
   * time                    (time) datetime64[ns] 1kB 1986-08-13T12:00:00 ......
     lat                     (time) float32 536B 30.1 30.46 30.8 ... 56.17 56.2
     lon                     (time) float32 536B -84.0 -84.04 -84.0 ... 6.923 8.0
 Data variables:
     radius_max_wind         (time) float32 536B 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0
     radius_oci              (time) float32 536B 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0
     max_sustained_wind      (time) float32 536B 10.0 10.0 10.0 ... 15.0 15.0
     central_pressure        (time) float32 536B 1.009e+03 1.01e+03 ... 1.006e+03
     environmental_pressure  (time) float64 1kB 1.01e+03 1.01e+03 ... 1.01e+03
     time_step               (time) float64 1kB 3.0 3.0 3.0 3.0 ... 3.0 3.0 3.0
     basin                   (time) <U2 1kB 'NA' 'NA' 'NA' ... 'NA' 'NA' 'NA'
 Attributes:
     max_sustained_wind_unit:  kn
     central_pressure_unit:    mb
     orig_e

In [14]:

train_set = TCTrackDataset(
    input_length=flag_params["input_length"],
    output_length=flag_params["train_output_length"],
    tc_tracks=tc_tracks_train,
    feature_list=feature_list,
    mode="train",
    jumps=flag_params["jumps"],
    scaler=scaler,
    fit=True,
)
valid_set = TCTrackDataset(
    input_length=flag_params["input_length"],
    output_length=flag_params["train_output_length"],
    tc_tracks=tc_tracks_train,
    feature_list=feature_list,
    mode="valid",
    jumps=flag_params["jumps"],
    scaler=scaler,
    fit=False,
)
test_set = TCTrackDataset(
    input_length=flag_params["input_length"],
    output_length=flag_params["test_output_length"],
    tc_tracks=tc_tracks_test,
    feature_list=feature_list,
    mode="test",
    # jumps=flag_params["jumps"], # jumps not used in test mode
    scaler=scaler,
    fit=False,
)
train_loader = data.DataLoader(
    train_set, batch_size=flag_params["batch_size"], shuffle=True, num_workers=1
)
valid_loader = data.DataLoader(
    valid_set, batch_size=flag_params["batch_size"], shuffle=True, num_workers=1
)
test_loader = data.DataLoader(
    test_set, batch_size=flag_params["batch_size"], shuffle=False, num_workers=1
)

if len(train_loader) == 0:
    raise Exception(
        "There are likely too few data points in the test set. Try to increase year_range."
    )

Check why we have nan values!!!

In [15]:
train_loader.dataset[1][0]

tensor([[-0.4151, -0.3649, -0.6774],
        [-0.4141, -0.3516, -0.6774],
        [-0.4141, -0.3420, -0.6774],
        [-0.4141, -0.3349, -0.6774],
        [-0.4133, -0.3293, -0.6774],
        [-0.4125, -0.3225, -0.6774],
        [-0.4124, -0.3103, -0.6774],
        [-0.4125, -0.2975, -0.6774],
        [-0.4125, -0.2883, -0.6774],
        [-0.4125, -0.2809, -0.6774],
        [-0.4126, -0.2743, -0.6774],
        [-0.4125, -0.2684, -0.6774],
        [-0.4118, -0.2625, -0.6774],
        [-0.4109, -0.2560, -0.6774],
        [-0.4100, -0.2479, -0.6774]])

In [16]:
n_data = np.sum(
    [
        tc["time"].shape[0] for tc in tc_tracks_train
    ]
)
n_data, n_data**2

(np.int64(4250), np.int64(18062500))

In [17]:
counter = 0
for inps, tgts in train_loader:
    if counter < 5:
        print(counter)
        print(inps.shape, type(inps))
        print(tgts.shape, type(inps))
        print(inps[0,:,0])
        print(tgts[0,:,0])
        print()
    
    counter += 1


0
torch.Size([32, 15, 3]) <class 'torch.Tensor'>
torch.Size([32, 1, 3]) <class 'torch.Tensor'>
tensor([0.2806, 0.2795, 0.2770, 0.2747, 0.2738, 0.2732, 0.2719, 0.2700, 0.2668,
        0.2637, 0.2618, 0.2605, 0.2589, 0.2573, 0.2559])
tensor([0.2542])

1
torch.Size([32, 15, 3]) <class 'torch.Tensor'>
torch.Size([32, 1, 3]) <class 'torch.Tensor'>
tensor([-0.3539, -0.3523, -0.3500, -0.3476, -0.3460, -0.3444, -0.3421, -0.3397,
        -0.3381, -0.3370, -0.3333, -0.3303, -0.3270, -0.3231, -0.3175])
tensor([-0.3098])

2
torch.Size([32, 15, 3]) <class 'torch.Tensor'>
torch.Size([32, 1, 3]) <class 'torch.Tensor'>
tensor([-0.4600, -0.4703, -0.4806, -0.4903, -0.4996, -0.5084, -0.5170, -0.5255,
        -0.5344, -0.5447, -0.5550, -0.5637, -0.5724, -0.5833, -0.5930])
tensor([-0.5986])

3
torch.Size([32, 15, 3]) <class 'torch.Tensor'>
torch.Size([32, 1, 3]) <class 'torch.Tensor'>
tensor([0.3207, 0.3199, 0.3191, 0.3173, 0.3143, 0.3095, 0.3048, 0.3035, 0.3017,
        0.2962, 0.2890, 0.2818, 0.2747, 0.2

In [18]:
rbf = RBFKernel(length_scale=1.0)

In [19]:
koopkernelmodel = KoopmanKernelSeq2Seq(
    kernel=rbf,
    input_dim = 1,
    input_length = 1,
    output_length = 1,
    output_dim = 1,
    num_steps = 1,
    num_nys_centers = 50,
    rng_seed = 42,
)

In [20]:
type(koopkernelmodel.global_koopman_operator)

torch.nn.parameter.Parameter

In [21]:
koopkernelmodel.global_koopman_operator

Parameter containing:
tensor([[-0.2122,  0.2179, -0.1368,  ...,  0.0022, -0.0993, -0.1021],
        [ 0.0252, -0.0875, -0.0142,  ...,  0.1951,  0.1739,  0.1662],
        [-0.1600, -0.1150,  0.0066,  ...,  0.1789, -0.1459,  0.0782],
        ...,
        [ 0.2169,  0.2000,  0.1758,  ...,  0.1577, -0.1661, -0.0881],
        [ 0.0287, -0.1941, -0.0957,  ..., -0.0567, -0.1431, -0.0213],
        [ 0.0134,  0.1698,  0.2299,  ..., -0.0866, -0.0634, -0.1238]],
       device='cuda:0', requires_grad=True)

In [22]:
koopkernelmodel._initialize_nystrom_data(train_loader)

In [23]:
train_loader_list = list(train_loader)
inps = train_loader_list[0][0].to(device)
inps.shape

torch.Size([32, 15, 3])

In [24]:
outs = koopkernelmodel.forward(inps)

In [25]:
outs.shape

torch.Size([32, 50, 15])