# Mixed Copula/fPCA

In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
from multiprocessing import cpu_count

import numpy as np
import xarray as xr
import pyvinecopulib as pv
import matplotlib.pyplot as plt

import synthia as syn

from examples_util import plot_random_columns, plot_ds_hist

In [None]:
# Constants
this_dir = Path.cwd()
data_dir = this_dir.parents[1] / 'data'
path_to_data = data_dir / 'nwp_saf_profiles_in.nc'

scalar_vars = [
 'skin_temperature',
 'sw_albedo',
 'lw_emissivity',
 'cos_solar_zenith_angle',
 'cloud_fraction'
]

vector_vars = [
 'temperature_fl',
 'q',
 'q_liquid',
 'q_ice',
 're_liquid',
 're_ice'
]

In [None]:
ds_true = xr.open_dataset(path_to_data)
# Load 'relevant' variables
ds_true = ds_true[scalar_vars + vector_vars]
ds_true

In [None]:
zero_boundary_vars = [
 'q',
 'q_liquid',
 'q_ice',
 're_liquid',
 're_ice'
]

zero_one_boundary_vars = [
 'sw_albedo',
 'lw_emissivity',
 'cos_solar_zenith_angle',
 'cloud_fraction'
]

# Tranform data that has hard bounds
transformer = syn.CombinedTransformer([
    syn.BoxCoxTransformer(zero_boundary_vars, 0),
    syn.ArcTanhTransformer(zero_one_boundary_vars)
])
ds_true_transformed  = transformer.apply(ds_true)
ds_true_transformed

In [None]:
# MixedFPCA will autimatically model vectors with fPCA and scalars with Copula
generator = syn.MixedFPCADataGenerator()

copula = syn.GaussianCopula()
parameterizer = syn.QuantileParameterizer(n_quantiles=100)

generator.fit(ds_true_transformed, copula=copula, n_fpca_components=20, parameterize_by=parameterizer)

In [None]:
# Generate same number of samples as in the input
n_samples = ds_true_transformed.dims['column']
ds_synth_transformed = generator.generate(n_samples=n_samples)
ds_synth_transformed

In [None]:
# Recover previously transformed variables
ds_synth = transformer.revert(ds_synth_transformed)
ds_synth

In [None]:
# FIXME: these are perhaps too smoth 
plot_random_columns(ds_true, ds_synth)

In [None]:
plot_ds_hist(ds_true, ds_synth)