In [None]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pickle

import xarray as xr

import synthia as syn
import pyvinecopulib as pv

import sys
sys.path.append('../src')

from util import load_ds_inputs
from util import compute_layer_longwave_downwelling
from util import train_test_split_dataset
from util import to_normalized_dataset, to_unnormalized_dataset

In [None]:
copula_type = str(os.environ.get('copula_type', 'gaussian'))
has_targets = int(os.environ.get('has_targets', 0))
is_normalised = int(os.environ.get('is_normalised', 1))
data_fraction = float(os.environ.get('data_fraction', 1))

num_threads = int(os.environ.get('num_threads', 2))

verbose=0

print(copula_type, has_targets, is_normalised, data_fraction, num_threads)

In [None]:
PROJ_PATH = Path.cwd().parent
ds_true_in = load_ds_inputs(PROJ_PATH)

In [None]:
if is_normalised:
    ds_true, _ = to_normalized_dataset(ds_true_in)
else:
    ds_true = ds_true_in

if has_targets:
    column_gas_optical_depth = 1.7
    flux_dn_hl_train = compute_layer_longwave_downwelling(ds_true_in, column_gas_optical_depth)
    ds_true = xr.merge([ds_true, flux_dn_hl_train])

ds_train, ds_test = train_test_split_dataset(ds_true, test_size=0.6, dim='column', shuffle=True, seed=42)
ds_test, ds_validation = train_test_split_dataset(ds_test, test_size=0.33334, dim='column', shuffle=True, seed=42)
ds_train, _ = train_test_split_dataset(ds_train, train_size=data_fraction, dim='column', shuffle=False)

display(ds_train)

In [None]:
if copula_type == 'gaussian':
    pyvinecopulib_ctrl = None
elif copula_type == 'tll':
    pyvinecopulib_ctrl = pv.FitControlsVinecop(family_set=[pv.tll], trunc_lvl=50, num_threads=num_threads)
elif copula_type == 'parametric':
    pyvinecopulib_ctrl = pv.FitControlsVinecop(family_set=pv.parametric, trunc_lvl=50, num_threads=num_threads)
else:
    raise RuntimeError('Copula option not supported')

generator = syn.CopulaDataGenerator(verbose=verbose)
parameterizer = None
if pyvinecopulib_ctrl:
    generator.fit(ds_train, copula=syn.VineCopula(controls=pyvinecopulib_ctrl), parameterize_by=parameterizer)
else:
    generator.fit(ds_train, copula=syn.GaussianCopula(), parameterize_by=parameterizer)

In [None]:
fname = f"copula_type={copula_type}-has_targets={has_targets}-is_normalised={is_normalised}-data_fraction={data_fraction}.pkl"
outdir = PROJ_PATH / 'results' / 'fitting'
outdir.mkdir(parents=True, exist_ok=True)

pickle.dump(generator, open(outdir / fname, 'wb'))