# Create data sets for yearly isotope emulation tasks and train models on them.

In [4]:
from datasets import *
from train import *
from predict import *
from evaluate import *
from util import *
import numpy as np

## 1) Create datasets.

## Tas, pr

In [7]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

In [None]:
create_yearly_dataset(description, base_folder, output_folder)

### tas only

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

In [None]:
create_yearly_dataset(description, base_folder, output_folder)

### pr only

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

In [None]:
create_yearly_dataset(description, base_folder, output_folder)

### pr, tas precip weighted

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = True


description["TIMESCALE"] = "YEARLY"

In [None]:
create_precip_weighted_dataset(description, base_folder, output_folder)

### pr, tas ico grid.

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Ico"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False

description["RESOLUTION"] = 5
description["INTERPOLATE_CORNERS"] = True
description["INTERPOLATION"] = "cons1"

description["TIMESCALE"] = "YEARLY"

In [None]:
create_yearly_dataset(description, base_folder, output_folder)

## 2) Run experiments yearly dataset

## Testing the effect of modifications to flat UNet

Start by selecting the tas, pr dataset without precipitation weighting.

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
loss = ["Masked_MSELoss", "Masked_AreaWeightedMSELoss"]
use_coord_conv = [False, True]
use_cylindrical_padding = [False, True]
n_runs = 10

In [None]:
for l in loss:
    for c_conv in use_coord_conv:
        for c_pad in use_cylindrical_padding:
            for i in range(n_runs):
                print(l, c_conv, c_pad, i)
                model_training_description["USE_CYLINDRICAL_PADDING"] = c_pad
                model_training_description["USE_COORD_CONV"] = c_conv
                model_training_description["LOSS"] = l  # "MSELoss" # "AreaWeightedMSELoss"
                model_training_description["RUN_NR"] = i
                unet = train_unet(description, model_training_description, output_folder)
                predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

## Comparing different ML methods:

Results for modified and unmodified flat UNet are already obtained in last cell.

### Flat grid:

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "PCA_Flat"
model_training_description["CREATE_VALIDATIONSET"] = False
# model_training_description["SHUFFLE_VALIDATIONSET"] = True

model_training_description["N_PC_PREDICTORS"] = 450
model_training_description["N_PC_TARGETS"] = 300
model_training_description["REGTYPE"] = "lasso"

In [None]:
pca, pca_targets, model = train_pca(description, model_training_description, output_folder)
print("finished training")
predict_save_pca(description, model_training_description, output_folder, pca, pca_targets, model, output_folder)

### Linear regression baseline:

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "LinReg_Pixelwise"
model_training_description["CREATE_VALIDATIONSET"] = False
# model_training_description["SHUFFLE_VALIDATIONSET"] = True

In [None]:
models = train_linreg_pixelwise(description, model_training_description, output_folder)
predict_save_linreg_pixelwise(description, model_training_description, output_folder, models, output_folder)

### PCA-regression baseline:

In [8]:
output_folder = "Output/Reproduce_new"

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "PCA_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = False

model_training_description["REGTYPE"] = "linreg"

Do hyperparameter tuning, compute 50x50 logarithmic grid.

In [9]:
n_pc_range = np.logspace(np.log10(3), np.log10(700), 50)
n_pc_in, n_pc_out = np.meshgrid(n_pc_range, n_pc_range)
n_pc_in = n_pc_in.flatten().astype("int")
n_pc_out = n_pc_out.flatten().astype("int")

In [None]:
from train_tune_pca import train_tune_pca
pca, pca_targets, model = train_tune_pca(description, model_training_description, output_folder, \
                                                    n_pc_in=n_pc_in, n_pc_out=n_pc_out)
predict_save_pca(description, model_training_description, output_folder, pca, pca_targets, model, output_folder)

### Random-forest baseline:

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "RandomForest_Pixelwise"
model_training_description["CREATE_VALIDATIONSET"] = False
# model_training_description["SHUFFLE_VALIDATIONSET"] = False

In [None]:
model = train_random_forest_pixelwise(description, model_training_description, output_folder, verbose=3, n_jobs=-1)
predict_save_randomforest_pixelwise(description, model_training_description, output_folder, model, output_folder)

### Icosahedral grid:

In [None]:

base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Ico"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False

description["RESOLUTION"] = 5
description["INTERPOLATE_CORNERS"] = True
description["INTERPOLATION"] = "cons1"

description["TIMESCALE"] = "YEARLY"

### PCA baseline on icosahedral grid:

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "PCA_Ico"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = False

model_training_description["REGTYPE"] = "linreg"

In [None]:
n_pc_range = np.logspace(np.log10(3), np.log10(700), 20)
n_pc_in, n_pc_out = np.meshgrid(n_pc_range, n_pc_range)
n_pc_in = n_pc_in.flatten().astype("int")
n_pc_out = n_pc_out.flatten().astype("int")

In [None]:
from train_tune_pca import train_tune_pca
pca, pca_targets, model = train_tune_pca(description, model_training_description, output_folder, \
                                                    n_pc_in=n_pc_in, n_pc_out=n_pc_out)

predict_save_pca(description, model_training_description, output_folder, pca, pca_targets, model, output_folder)

### Ico UNet:

In [None]:
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Ico"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = IcoBatchNorm2d # torch.nn.BatchNorm2d


model_training_description["OPTIMIZER"] = "Adam"
model_training_description["LOSS"] = "MSELoss"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

## Comparing different predictor variables

### tas only:

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

### precip only

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder, use_tensorboard=True)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

## Hyperparameter tuning

### Learning rate tuning:

In [15]:
lrs = np.logspace(-4,-1,20)
runs_per_lr = 3

In [16]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new_lr"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf",
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [17]:
for lr in lrs:
    for i in range(runs_per_lr):
        model_training_description["RUN_NR"] = i
        model_training_description["LEARNING_RATE"] = lr  # 0.002637 # 5e-3  # use either this or default ADAM learning rate
        unet = train_unet(description, model_training_description, output_folder)
        predict_save_unet(description, model_training_description, output_folder, unet, output_folder)    

Starting training
Epoch [1], Iter [91/101] Loss: 0.7119
Epoch [2], Iter [91/101] Loss: 0.6673
Epoch [3], Iter [91/101] Loss: 0.6078
Epoch [4], Iter [91/101] Loss: 0.5898
Epoch [5], Iter [91/101] Loss: 0.5818
Epoch [6], Iter [91/101] Loss: 0.5704
Epoch [7], Iter [91/101] Loss: 0.5853
Epoch [8], Iter [91/101] Loss: 0.5513
Epoch [9], Iter [91/101] Loss: 0.5654
Epoch [10], Iter [91/101] Loss: 0.5454
Epoch [11], Iter [91/101] Loss: 0.5383
Epoch [12], Iter [91/101] Loss: 0.5547
Epoch [13], Iter [91/101] Loss: 0.5546
Epoch [14], Iter [91/101] Loss: 0.5380
Epoch [15], Iter [91/101] Loss: 0.5231
Epoch [16], Iter [91/101] Loss: 0.5415
Epoch [17], Iter [91/101] Loss: 0.4902
Epoch [18], Iter [91/101] Loss: 0.4913
Epoch [19], Iter [91/101] Loss: 0.4780
Epoch [20], Iter [91/101] Loss: 0.4940
Epoch [21], Iter [91/101] Loss: 0.4936
Epoch [22], Iter [91/101] Loss: 0.4906
Test MSE: 0.6538720726966858
writing predictions
writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.6883
Ep

Epoch [3], Iter [91/101] Loss: 0.6828
Epoch [4], Iter [91/101] Loss: 0.5857
Epoch [5], Iter [91/101] Loss: 0.5801
Epoch [6], Iter [91/101] Loss: 0.5745
Epoch [7], Iter [91/101] Loss: 0.5648
Epoch [8], Iter [91/101] Loss: 0.5689
Epoch [9], Iter [91/101] Loss: 0.5580
Epoch [10], Iter [91/101] Loss: 0.5279
Epoch [11], Iter [91/101] Loss: 0.6115
Epoch [12], Iter [91/101] Loss: 0.5091
Epoch [13], Iter [91/101] Loss: 0.4861
Epoch [14], Iter [91/101] Loss: 0.4871
Test MSE: 0.6354402899742126
writing predictions
writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.6620
Epoch [2], Iter [91/101] Loss: 0.6458
Epoch [3], Iter [91/101] Loss: 0.7688
Epoch [4], Iter [91/101] Loss: 0.5739
Epoch [5], Iter [91/101] Loss: 0.5884
Epoch [6], Iter [91/101] Loss: 0.5650
Epoch [7], Iter [91/101] Loss: 0.5396
Epoch [8], Iter [91/101] Loss: 0.5559
Epoch [9], Iter [91/101] Loss: 0.5518
Epoch [10], Iter [91/101] Loss: 0.5230
Epoch [11], Iter [91/101] Loss: 0.5540
Epoch [12], Iter [91/101] 

Epoch [14], Iter [91/101] Loss: 0.5242
Epoch [15], Iter [91/101] Loss: 0.5134
Epoch [16], Iter [91/101] Loss: 0.5124
Epoch [17], Iter [91/101] Loss: 0.5259
Epoch [18], Iter [91/101] Loss: 0.4783
Epoch [19], Iter [91/101] Loss: 0.4809
Epoch [20], Iter [91/101] Loss: 0.4796
Epoch [21], Iter [91/101] Loss: 0.4820
Epoch [22], Iter [91/101] Loss: 0.4879
Test MSE: 0.6237897872924805
writing predictions
writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.6500
Epoch [2], Iter [91/101] Loss: 0.6025
Epoch [3], Iter [91/101] Loss: 0.6050
Epoch [4], Iter [91/101] Loss: 0.5638
Epoch [5], Iter [91/101] Loss: 0.5842
Epoch [6], Iter [91/101] Loss: 0.6896
Epoch [7], Iter [91/101] Loss: 0.5646
Epoch [8], Iter [91/101] Loss: 0.6568
Epoch [9], Iter [91/101] Loss: 0.5580
Epoch [10], Iter [91/101] Loss: 0.5523
Epoch [11], Iter [91/101] Loss: 0.5375
Epoch [12], Iter [91/101] Loss: 0.5364
Epoch [13], Iter [91/101] Loss: 0.5208
Epoch [14], Iter [91/101] Loss: 0.5122
Epoch [15], Iter [9

writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.6778
Epoch [2], Iter [91/101] Loss: 0.6499
Epoch [3], Iter [91/101] Loss: 0.7106
Epoch [4], Iter [91/101] Loss: 0.6477
Epoch [5], Iter [91/101] Loss: 0.7132
Epoch [6], Iter [91/101] Loss: 0.6214
Epoch [7], Iter [91/101] Loss: 0.6028
Epoch [8], Iter [91/101] Loss: 0.6089
Epoch [9], Iter [91/101] Loss: 0.5677
Epoch [10], Iter [91/101] Loss: 0.5788
Epoch [11], Iter [91/101] Loss: 0.5542
Epoch [12], Iter [91/101] Loss: 0.5557
Epoch [13], Iter [91/101] Loss: 0.5755
Epoch [14], Iter [91/101] Loss: 0.5796
Epoch [15], Iter [91/101] Loss: 0.5648
Epoch [16], Iter [91/101] Loss: 0.5471
Epoch [17], Iter [91/101] Loss: 0.5427
Epoch [18], Iter [91/101] Loss: 0.5444
Epoch [19], Iter [91/101] Loss: 0.5266
Epoch [20], Iter [91/101] Loss: 0.5085
Epoch [21], Iter [91/101] Loss: 0.5348
Epoch [22], Iter [91/101] Loss: 0.5211
Epoch [23], Iter [91/101] Loss: 0.5260
Epoch [24], Iter [91/101] Loss: 0.5186
Epoch [25], Iter [91/101] Los

Epoch [10], Iter [91/101] Loss: 0.5856
Epoch [11], Iter [91/101] Loss: 0.5761
Epoch [12], Iter [91/101] Loss: 0.5426
Epoch [13], Iter [91/101] Loss: 0.5681
Epoch [14], Iter [91/101] Loss: 0.5636
Epoch [15], Iter [91/101] Loss: 0.6284
Epoch [16], Iter [91/101] Loss: 0.5334
Epoch [17], Iter [91/101] Loss: 0.5878
Epoch [18], Iter [91/101] Loss: 0.5501
Epoch [19], Iter [91/101] Loss: 0.5624
Epoch [20], Iter [91/101] Loss: 0.5305
Epoch [21], Iter [91/101] Loss: 0.5276
Epoch [22], Iter [91/101] Loss: 0.5400
Epoch [23], Iter [91/101] Loss: 0.5125
Epoch [24], Iter [91/101] Loss: 0.5373
Epoch [25], Iter [91/101] Loss: 0.5209
Test MSE: 0.6340331435203552
writing predictions
writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.6669
Epoch [2], Iter [91/101] Loss: 0.6829
Epoch [3], Iter [91/101] Loss: 0.6533
Epoch [4], Iter [91/101] Loss: 0.6295
Epoch [5], Iter [91/101] Loss: 0.6366
Epoch [6], Iter [91/101] Loss: 0.6149
Epoch [7], Iter [91/101] Loss: 0.6122
Epoch [8], Iter [

Epoch [16], Iter [91/101] Loss: 0.5750
Epoch [17], Iter [91/101] Loss: 0.5626
Epoch [18], Iter [91/101] Loss: 0.5591
Epoch [19], Iter [91/101] Loss: 0.6187
Epoch [20], Iter [91/101] Loss: 0.6329
Epoch [21], Iter [91/101] Loss: 0.5542
Epoch [22], Iter [91/101] Loss: 0.5485
Epoch [23], Iter [91/101] Loss: 0.5367
Epoch [24], Iter [91/101] Loss: 0.5125
Epoch [25], Iter [91/101] Loss: 0.5776
Epoch [26], Iter [91/101] Loss: 0.5978
Epoch [27], Iter [91/101] Loss: 0.5133
Epoch [28], Iter [91/101] Loss: 0.5263
Epoch [29], Iter [91/101] Loss: 0.5237
Epoch [30], Iter [91/101] Loss: 0.5125
Epoch [31], Iter [91/101] Loss: 0.5729
Epoch [32], Iter [91/101] Loss: 0.5204
Epoch [33], Iter [91/101] Loss: 0.4984
Epoch [34], Iter [91/101] Loss: 0.5346
Epoch [35], Iter [91/101] Loss: 0.5092
Test MSE: 0.6162530779838562
writing predictions
writing descriptions
done
Starting training
Epoch [1], Iter [91/101] Loss: 0.7234
Epoch [2], Iter [91/101] Loss: 0.7548
Epoch [3], Iter [91/101] Loss: 0.6755
Epoch [4], It

Epoch [2], Iter [91/101] Loss: 0.6548
Epoch [3], Iter [91/101] Loss: 0.6428
Epoch [4], Iter [91/101] Loss: 0.6573
Epoch [5], Iter [91/101] Loss: 0.6206
Epoch [6], Iter [91/101] Loss: 0.6569
Epoch [7], Iter [91/101] Loss: 0.6220
Epoch [8], Iter [91/101] Loss: 0.5814
Epoch [9], Iter [91/101] Loss: 0.6088
Epoch [10], Iter [91/101] Loss: 0.6313
Epoch [11], Iter [91/101] Loss: 0.5764
Epoch [12], Iter [91/101] Loss: 0.7040
Epoch [13], Iter [91/101] Loss: 0.5829
Epoch [14], Iter [91/101] Loss: 0.5716
Epoch [15], Iter [91/101] Loss: 0.5788
Epoch [16], Iter [91/101] Loss: 0.5605
Epoch [17], Iter [91/101] Loss: 0.5496
Epoch [18], Iter [91/101] Loss: 0.5526
Epoch [19], Iter [91/101] Loss: 0.5893
Epoch [20], Iter [91/101] Loss: 0.5332
Epoch [21], Iter [91/101] Loss: 0.5614
Epoch [22], Iter [91/101] Loss: 0.5701
Epoch [23], Iter [91/101] Loss: 0.5169
Epoch [24], Iter [91/101] Loss: 0.5147
Epoch [25], Iter [91/101] Loss: 0.5505
Epoch [26], Iter [91/101] Loss: 0.5340
Epoch [27], Iter [91/101] Loss: 0

Epoch [11], Iter [91/101] Loss: 0.6198
Epoch [12], Iter [91/101] Loss: 0.5795
Epoch [13], Iter [91/101] Loss: 0.6522
Epoch [14], Iter [91/101] Loss: 0.5963
Epoch [15], Iter [91/101] Loss: 0.5993
Epoch [16], Iter [91/101] Loss: 0.5699
Epoch [17], Iter [91/101] Loss: 0.5729
Epoch [18], Iter [91/101] Loss: 0.5553
Epoch [19], Iter [91/101] Loss: 0.5450
Epoch [20], Iter [91/101] Loss: 0.5519
Epoch [21], Iter [91/101] Loss: 0.5594
Epoch [22], Iter [91/101] Loss: 0.5313
Epoch [23], Iter [91/101] Loss: 0.5676
Epoch [24], Iter [91/101] Loss: 0.5408
Epoch [25], Iter [91/101] Loss: 0.5403
Epoch [26], Iter [91/101] Loss: 0.5721
Epoch [27], Iter [91/101] Loss: 0.5774
Epoch [28], Iter [91/101] Loss: 0.5470
Epoch [29], Iter [91/101] Loss: 0.5909
Epoch [30], Iter [91/101] Loss: 0.5483
Epoch [31], Iter [91/101] Loss: 0.5393
Epoch [32], Iter [91/101] Loss: 0.5740
Epoch [33], Iter [91/101] Loss: 0.5690
Epoch [34], Iter [91/101] Loss: 0.5589
Epoch [35], Iter [91/101] Loss: 0.5323
Test MSE: 0.6214001774787

### UNet wider:

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf",
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 64
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (64,64,128,128)



model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

### UNet deeper:

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf",
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = False


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

# training parameters
model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True

# model parameters
model_training_description["DEPTH"] = 4 # this changes compared to standard UNet
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,128,128)



model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)

# 3) Precipitation weighting 

Test how much the results differ if we weight by precipitation amount in the creation of the yearly dataset.

In [None]:
base_folder = "Datasets"
output_folder = "Output/Reproduce_new"

description = {}

description["DATASETS_USED"] = ["isotopes", 
                                "tsurf",
                                "prec"]

description["PREDICTOR_VARIABLES"] = {"tsurf": ["tsurf"],
                                      "prec": ["prec"]}

description["TARGET_VARIABLES"] = {"isotopes": ["d18O"]}

description["DATASETS_NO_GAPS"] = ["isotopes", 
                                   "tsurf", 
                                   "prec"]

description["CLIMATE_MODEL"] = "iHadCM3"
description["GRID_TYPE"] = "Flat"

description["START_YEAR"] = 850
description["END_YEAR"] = 1850
description["LATITUDES_SLICE"] = [1,-1]

description["SPLIT_YEAR"] = 1750
description["DO_SHUFFLE"] = False
description["PRECIP_WEIGHTING"] = True


description["TIMESCALE"] = "YEARLY"

### MODEL_TRAINING ###############################################

model_training_description = {}
model_training_description["S_MODE_PREDICTORS"] = ["Pixelwise","Pixelwise"] # how to standardize the given variables
model_training_description["S_MODE_TARGETS"] = ["Pixelwise"]

model_training_description["DATASET_FOLDER"] = output_folder

model_training_description["MODEL_TYPE"] = "UNet_Flat"
model_training_description["CREATE_VALIDATIONSET"] = True
model_training_description["SHUFFLE_VALIDATIONSET"] = True


# training parameters

model_training_description["DEPTH"] = 3
model_training_description["NUM_EPOCHS"] = "early_stopping"  # 20
model_training_description["PATIENCE"] = 5
model_training_description["BATCH_SIZE"] = 8
model_training_description["LEARNING_RATE"] = 1e-3  # 0.002637 # 5e-3  # use either this or default ADAM learning rate

# model parameters
model_training_description["DEPTH"] = 3
model_training_description["IN_CHANNELS"] = len(util.flatten(description["PREDICTOR_VARIABLES"].values()))
model_training_description["CHANNELS_FIRST_CONV"] = 32
model_training_description["OUT_CHANNELS"] = len(util.flatten(description["TARGET_VARIABLES"].values()))
model_training_description["FMAPS"] = (32,32,64,64)


model_training_description["ACTIVATION"] = torch.nn.ReLU
model_training_description["NORMALIZATION"] = torch.nn.BatchNorm2d  # IcoBatchNorm2d 


model_training_description["OPTIMIZER"] = "Adam"

model_training_description["DEVICE"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_training_description["USE_CYLINDRICAL_PADDING"] = True
model_training_description["USE_COORD_CONV"] = True
model_training_description["LOSS"] = "Masked_AreaWeightedMSELoss"

In [None]:
n_runs = 10
for i in range(n_runs):
    model_training_description["RUN_NR"] = i
    unet = train_unet(description, model_training_description, output_folder)
    predict_save_unet(description, model_training_description, output_folder, unet, output_folder)