In [None]:
%load_ext autoreload
%autoreload 2

import os

# Seed value
seed_value = 42

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.compat.v1.set_random_seed(seed_value)

from pathlib import Path
import numpy as np
from scipy.constants import Stefan_Boltzmann
import xarray as xr
import pickle

import matplotlib.pyplot as plt

import sys
sys.path.append('../src')

import synthia as syn

from util import load_ds_inputs
from util import to_stacked_array, to_unstacked_dataset

## Parameters

In [None]:
epochs = 1000
iterations = 10
verbose = 0

data_fraction = float(os.environ.get('data_fraction', 1))

print(data_fraction)

In [None]:
fname = f"ml-control-data_fraction={data_fraction}.pkl"
outdir = Path.cwd().parent / 'results' / 'ml'
outdir.mkdir(parents=True, exist_ok=True)
fpath = outdir / fname

if fpath.exists():
    raise RuntimeError('This case is already present. Skipping...')

## Load inputs

In [None]:
PROJ_PATH = Path.cwd().parent
ds_true_in = load_ds_inputs(PROJ_PATH)
ds_true_in

## Normalize data

In [None]:
from util import to_normalized_dataset
ds_normalized, stats_info = to_normalized_dataset(ds_true_in)

In [None]:
from util import to_unnormalized_dataset
ds_unnormalized = to_unnormalized_dataset(ds_normalized, stats_info)

In [None]:
from util import plot_random_columns
plot_random_columns(ds_true_in, ds_unnormalized, 100)

## Split data

In [None]:
from util import train_test_split_dataset

In [None]:
ds_train, ds_test = train_test_split_dataset(ds_normalized, test_size=0.6, dim='column', shuffle=True, seed=42)
ds_test, ds_validation = train_test_split_dataset(ds_test, test_size=0.33334, dim='column', shuffle=True, seed=42)
ds_train, _ = train_test_split_dataset(ds_train, train_size=data_fraction, dim='column', shuffle=False)

In [None]:
display(ds_train, ds_test, ds_validation)

## Pysical model

In [None]:
from util import compute_layer_longwave_downwelling

column_gas_optical_depth = 1.7 # We try with 30, range is 0.5 to many thousands.
flux_dn_hl_train = compute_layer_longwave_downwelling(to_unnormalized_dataset(ds_train, stats_info), column_gas_optical_depth)
flux_dn_hl_test = compute_layer_longwave_downwelling(to_unnormalized_dataset(ds_test, stats_info), column_gas_optical_depth)
flux_dn_hl_validation = compute_layer_longwave_downwelling(to_unnormalized_dataset(ds_validation, stats_info), column_gas_optical_depth)

ds_train_X_y = xr.merge([ds_train, flux_dn_hl_train])
ds_test_X_y = xr.merge([ds_test, flux_dn_hl_test])
ds_validation_X_y = xr.merge([ds_validation, flux_dn_hl_validation])

display(ds_train_X_y, ds_test_X_y, ds_validation_X_y)

In [None]:
plot_random_columns(ds_train_X_y[['flux_dn_lw']], ds_train_X_y[['flux_dn_lw']])

## Machine learning

### MLP

In [None]:
from util import compute_lw_dn_mlp

X_vars = list(ds_true_in)
y_var = ['flux_dn_lw']

df_stats, ds_y_pred_test_best = compute_lw_dn_mlp(ds_train_X_y, ds_validation_X_y, ds_test_X_y, X_vars, y_var,
                                epochs, iterations, verbose=verbose)

In [None]:
df_stats

In [None]:
plot_random_columns(ds_test_X_y[['flux_dn_lw']], ds_y_pred_test_best[['flux_dn_lw']], 10)

In [None]:
name = 'control'
obj = {
    'is_control': True,
    'name': name,
    'data_fraction': data_fraction,
    'stats': df_stats,
    'y_best' : ds_y_pred_test_best
}

obj

In [None]:
with open(fpath, 'wb') as f:
    pickle.dump(obj, f)

In [None]:
# Test
with open(fpath, 'rb') as f:
    obj = pickle.load(f)
obj