## Lightning CNN 

In [31]:
import wandb
import numpy as np
import sys
import torch
import torch.utils.data as Data
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor

In [32]:
#wandb.login()

In [33]:
BASE = '/scratch/ab10313/pleiades/'
PATH_NN= BASE+'NN_data_smooth/'

In [34]:
import systems.regression_system as regression_system
import models.fcnn as fcnn
#import lightning.util.performance as performance
#import util.misc as misc
#import pyqg_explorer.dataset.forcing_dataset as forcing_dataset

In [35]:
# load preprocessed data into input and output channels

# X INPUT
grad_B = np.load(PATH_NN+'grad_B.npy')
FCOR = np.load(PATH_NN+'FCOR.npy')
Nsquared = np.load(PATH_NN+'Nsquared.npy')
HML = np.load(PATH_NN+'HML.npy')
TAU = np.load(PATH_NN+'TAU.npy')
Q = np.load(PATH_NN+'Q.npy')
HBL = np.load(PATH_NN+'HBL.npy')
div = np.load(PATH_NN+'div.npy')
vort = np.load(PATH_NN+'vort.npy')
strain = np.load(PATH_NN+'strain.npy')

X_input = np.stack([FCOR, grad_B, HML, Nsquared, TAU, Q, HBL, div, vort, strain],axis=1)
print('X input shape:')
print( X_input.shape)
print('')


# Y OUTPUT
WB_sg = np.load(PATH_NN+'WB_sg.npy')
WB_sg_mean = np.load(PATH_NN+'WB_sg_mean.npy')
WB_sg_std = np.load(PATH_NN+'WB_sg_std.npy')
              
Y_output = np.tile(WB_sg,(1,1,1,1)).reshape(WB_sg.shape[0],1,WB_sg.shape[1],WB_sg.shape[2]) 
print('Y output shape:')
print(Y_output.shape)
print('')

np.isnan(X_input).any()
np.isnan(Y_output).any()

X input shape:
(8450, 10, 40, 40)

Y output shape:
(8450, 1, 40, 40)



False

In [36]:
# TRAIN AND TEST ONLY
# randomnly generate train, test and validation time indecies 
import random
time_ind = X_input.shape[0]
rand_ind = np.arange(time_ind)
rand_seed = 14
random.Random(rand_seed).shuffle(rand_ind)
train_percent = 0.9
test_percent = 0.1 
print(f"Dataset: train {np.round(train_percent*100)}%, test {np.round(test_percent*100)}%")
train_ind, test_ind =  rand_ind[:round(train_percent*time_ind)], rand_ind[round((train_percent)*time_ind):]                                                                        

# check no overlapping indecies
if np.intersect1d(train_ind, test_ind).any():
    print('overlapping indecies')
else:
    print ('no overlapping indecies')
    

Dataset: train 90.0%, test 10.0%
no overlapping indecies


In [53]:
# Define X,Y pairs (state, subgrid fluxes) for local network.local_torch_dataset = Data.TensorDataset(
BATCH_SIZE = 64  # Number of sample in each batch


###### training dataset #######
torch_dataset_train = Data.TensorDataset(
    torch.from_numpy(X_input[train_ind]).float(),
    torch.from_numpy(Y_output[train_ind]).float(),
)

train_loader = Data.DataLoader(
    dataset=torch_dataset_train, batch_size=BATCH_SIZE, shuffle=True
)
print('TRAIN')
print('X input shape:')
print( X_input[train_ind].shape)
print('Y output shape:')
print( Y_output[train_ind].shape)
print('')

###### test dataset #######
torch_dataset_test = Data.TensorDataset(
    torch.from_numpy(X_input[test_ind]).float(),
    torch.from_numpy(Y_output[test_ind]).float(),    
)

BATCH_SIZE_TEST = len(torch_dataset_test)

test_loader = Data.DataLoader(
    dataset=torch_dataset_test, batch_size=BATCH_SIZE_TEST, shuffle=False
)

print('TEST')
print('X input shape:')
print( X_input[test_ind].shape)
print('Y output shape:')
print( Y_output[test_ind].shape)
print('')



TRAIN
X input shape:
(7605, 10, 40, 40)
Y output shape:
(7605, 1, 40, 40)

TEST
X input shape:
(845, 10, 40, 40)
Y output shape:
(845, 1, 40, 40)



In [54]:

# use GPUs if available
if torch.cuda.is_available():
    print("CUDA Available")
    device = torch.device('cuda')
else:
    print('CUDA Not Available')
    device = torch.device('cpu')

CUDA Available


In [55]:
seed=123
batch_size=64
input_channels=10
output_channels=1
activation="ReLU"
arch="fcnn"
epochs=12
conv_layers=8
save_path=BASE+"models"
save_name="cnn_test.pt"
lr=0.0001
wd=0.01

## Wandb config file
config={"seed":seed,
        "lr":lr,
        "wd":wd,
        "batch_size":batch_size,
        "input_channels":input_channels,
        "output_channels":output_channels,
        "activation":activation,
        "save_name":save_name,
        "save_path":save_path,
        "arch":arch,
        "conv_layers":conv_layers,
        "epochs":epochs}

In [56]:
model=fcnn.FCNN(config)
system=regression_system.RegressionSystem(model)

## Store the number of learanble parameters
config["learnable parameters"]=sum(p.numel() for p in model.parameters())

## Initialise wandb run - pass config dictionary storing the model parameters
wandb.init(project="submeso_ML",config=config)
wandb.watch(model, log_freq=1)

[]

In [57]:
logger = WandbLogger()

  rank_zero_warn(


In [61]:
trainer = pl.Trainer(
    default_root_dir=model.config["save_path"],
    accelerator="auto",
    max_epochs=model.config["epochs"],
    #callbacks=pbar.ProgressBar(),
    logger=WandbLogger()
)

trainer.fit(system, train_loader, test_loader)
wandb.log({"loss": loss, "epoch": epoch}) 

model.save_model()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params
--------------------------------------
0 | network   | FCNN    | 293 K 
1 | criterion | MSELoss | 0     
--------------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.174     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=12` reached.


NameError: name 'loss' is not defined

In [None]:
config=reg_sys.config
time_hor=int(sys.argv[1])
config["drop_spin_up"]=True

def train(time_horizon,beta_loss):
    config["beta_loss"]=beta_loss
    config["time_horizon"]=time_horizon
    ## Emulator model used for forward prediction
    config["model_string"]="/scratch/cp3759/pyqg_data/models/emulator/fcnn_residuals/fcnnr_4_%d_step_all_May.p" % config["time_horizon"]
    model_beta=misc.load_model(config["model_string"])
    config["epochs"]=100
    config["subgrid_models"]=["HRC"]
    config["theta_loss"]=1
    config["save_path"]="/scratch/cp3759/pyqg_data/models/joint_May_nospin"
    config["save_name"]="joint_beta%d_time%d.p" % (config["beta_loss"],config["time_horizon"])

    dataset=forcing_dataset.EmulatorForcingDataset('/scratch/cp3759/pyqg_data/sims/%d_step_forcing/' % config["time_horizon"],config["subgrid_models"],
                                channels=4,seed=config["seed"],subsample=config["subsample"],drop_spin_up=config["drop_spin_up"])

    train_loader = DataLoader(
        dataset,
        num_workers=10,
        batch_size=64,
        sampler=SubsetRandomSampler(dataset.train_idx),
    )
    valid_loader = DataLoader(
        dataset,
        num_workers=10,
        batch_size=64,
        sampler=SubsetRandomSampler(dataset.valid_idx),
    )

    config["q_mean_upper"]=dataset.q_mean_upper
    config["q_mean_lower"]=dataset.q_mean_lower
    config["q_std_upper"]=dataset.q_std_upper
    config["q_std_lower"]=dataset.q_std_lower
    config["s_mean_upper"]=dataset.s_mean_upper
    config["s_mean_lower"]=dataset.s_mean_lower
    config["s_std_upper"]=dataset.s_std_upper
    config["s_std_lower"]=dataset.s_std_lower
    config["training_fields"]=len(dataset.train_idx)
    config["validation_fields"]=len(dataset.valid_idx)

    model=fcnn.FCNN(config)

    system=reg_sys.JointRegressionSystem(model,config,model_beta)
    system.network_beta.requires_grad=False

    wandb.init(project="joint_opt_sweep", entity="m2lines",config=config,dir="/scratch/cp3759/pyqg_data/wandb_runs")
    wandb.config["theta learnable parameters"]=sum(p.numel() for p in model.parameters())
    wandb.watch(model, log_freq=1)

    logger = WandbLogger()
    lr_monitor=LearningRateMonitor(logging_interval='epoch')

    trainer = pl.Trainer(
        accelerator="auto",
        max_epochs=config["epochs"],
        logger=logger,
        enable_progress_bar=False,
        callbacks=[lr_monitor]
        )

    trainer.fit(system, train_loader, valid_loader)

    perf=performance.ParameterizationPerformance(model,valid_loader,threshold=5000)

    dist_fig=perf.get_distribution_2d()
    figure_dist=wandb.Image(dist_fig)
    wandb.log({"Distributions": figure_dist})

    power_fig=perf.get_power_spectrum()
    figure_power=wandb.Image(power_fig)
    wandb.log({"Power spectra": figure_power})

    field_fig=perf.get_fields()
    figure_field=wandb.Image(field_fig)
    wandb.log({"Random fields": figure_field})

    online_fig=perf.online_comparison()
    figure_online=wandb.Image(online_fig)
    wandb.log({"Online test": figure_online})

    model.save_model()
    wandb.finish()

loss_betas=[1,10,100,1000]
for beta_loss in loss_betas:
    train(time_hor,beta_loss)

print("Finito")

In [8]:
sweep_config = {
    'method': 'grid'
    }

In [20]:
import matplotlib.pyplot as plt
import wandb
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.data as Data

## Our fancy new modules
import universal_parameterization.systems.regression_system as regression_system
import universal_parameterization.arch.fcnn as fcnn
import universal_parameterization.dataset.pyqg_dataset as dataset
import universal_parameterization.util.plot_helpers as plot_helpers
import universal_parameterization.util.metrics as metrics

In [18]:

BASE = '/scratch/ab10313/pleiades/'
PATH_NN = BASE+'NN_data_smooth/'

In [16]:
# TRAIN AND TEST ONLY
# randomnly generate train, test and validation time indecies 
import random
time_ind = X_input.shape[0]
rand_ind = np.arange(time_ind)
rand_seed = 14
random.Random(rand_seed).shuffle(rand_ind)
train_percent = 0.9
test_percent = 0.1 
print(f"Dataset: train {np.round(train_percent*100)}%, test {np.round(test_percent*100)}%")
train_ind, test_ind =  rand_ind[:round(train_percent*time_ind)], rand_ind[round((train_percent)*time_ind):]                                                                        

# check no overlapping indecies
if np.intersect1d(train_ind, test_ind).any():
    print('overlapping indecies')
else:
    print ('no overlapping indecies')
    

Dataset: train 90.0%, test 10.0%
no overlapping indecies


In [21]:
# Define X,Y pairs (state, subgrid fluxes) for local network.local_torch_dataset = Data.TensorDataset(
BATCH_SIZE = 64  # Number of sample in each batch


###### training dataset #######
torch_dataset_train = Data.TensorDataset(
    torch.from_numpy(X_input[train_ind]).double(),
    torch.from_numpy(Y_output[train_ind]).double(),
)

loader_train = Data.DataLoader(
    dataset=torch_dataset_train, batch_size=BATCH_SIZE, shuffle=True
)
print('TRAIN')
print('X input shape:')
print( X_input[train_ind].shape)
print('Y output shape:')
print( Y_output[train_ind].shape)
print('')

###### test dataset #######
torch_dataset_test = Data.TensorDataset(
    torch.from_numpy(X_input[test_ind]).double(),
    torch.from_numpy(Y_output[test_ind]).double(),    
)

BATCH_SIZE_TEST = len(torch_dataset_test)

loader_test = Data.DataLoader(
    dataset=torch_dataset_test, batch_size=BATCH_SIZE_TEST, shuffle=False
)

print('TEST')
print('X input shape:')
print( X_input[test_ind].shape)
print('Y output shape:')
print( Y_output[test_ind].shape)
print('')



TRAIN
X input shape:
(7605, 10, 40, 40)
Y output shape:
(7605, 1, 40, 40)

TEST
X input shape:
(845, 10, 40, 40)
Y output shape:
(845, 1, 40, 40)



In [22]:
batch_size=64
lr=0.001  ## learning rate
wd=0.01   ## weight decay
epochs=30 ## Setting this to a low number just for testing purposes

In [30]:
model=fcnn.FCNN() ## NN architecture: could be FCNN, resnet, ANN.. etc
system=regression_system.RegressionSystem(model) ## Optimisation framework: define loss, regularisation, configure optimiser etc

TypeError: __init__() missing 1 required positional argument: 'config'

In [25]:
trainer = pl.Trainer(
    accelerator="auto",
    max_epochs=30,
    enable_progress_bar=False,
    )

## This calls the standard pytorch training/validation loop using the RegressionSystem.step() method from base_model.py.
## See https://pytorch-lightning.readthedocs.io/en/stable/extensions/loops.html to inspect the syntax that lightning is
## running behind the scenes.
trainer.fit(system, loader_train, loader_test)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/ab10313/submeso_ML/cnn/lightning/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params
--------------------------------------
0 | network   | FCNN    | 268 K 
1 | criterion | MSELoss | 0     
--------------------------------------
268 K     Trainable params
0         Non-trainable params
268 K     Total params
1.073     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.
  rank_zero_warn(


RuntimeError: Given groups=1, weight of size [128, 2, 5, 5], expected input[845, 10, 44, 44] to have 2 channels, but got 10 channels instead