<a href="https://colab.research.google.com/github/deniseiras/BAM1D-Microphysics-Emulation/blob/main/BAM1D_Physics_Emulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BAM1D Physics Emulation

<font size="5">**Objetivo:** Emular a Hugh Morrison Microphysics implementada em BAM1D</font>
 


SUBROUTINE RunMicro_HugMorr(  
  
**ncols** - numero de colunas - fixo = 1  
nCols       , &!INTEGER      , INTENT(IN   ) :: nCols

**kmax** - número de níveis = k_max  
kMax        , &!INTEGER      , INTENT(IN   ) :: kMax 

**si** - kmax+1 removido, sem valores/usado
si          , &!REAL(KIND=r8), INTENT(IN   ) :: si(kMax+1)

**sl**  
sl          , &!REAL(KIND=r8), INTENT(IN   ) :: sl(kMax)

**tc** - Temperature(K)  
tc          , &!REAL(KIND=r8), INTENT(INOUT) :: Tc (1:nCols, 1:kMax)

<font size="5">Q's - Mixing ratio </font>

**QV** - water vapor  
QV          , &!REAL(KIND=r8), INTENT(INOUT) :: qv (1:nCols, 1:kMax)

**QC** - cloud water  
QC          , &!REAL(KIND=r8), INTENT(INOUT) :: qc (1:nCols, 1:kMax)

**QR**  - rain water  
QR          , &!REAL(KIND=r8), INTENT(INOUT) :: qr (1:nCols, 1:kMax)

**QI** - cloud ice  
QI          , &!REAL(KIND=r8), INTENT(INOUT) :: qi (1:nCols, 1:kMax)

**QS** - Snow  
QS          , &!REAL(KIND=r8), INTENT(INOUT) :: qs (1:nCols, 1:kMax)

**QG** - Graupel  
QG          , &!REAL(KIND=r8), INTENT(INOUT) :: qg (1:nCols, 1:kMax)

<font size="5">*N's* - number concentration
</font>  

**NI** - cloud ice  
NI          , &!REAL(KIND=r8), INTENT(INOUT) :: ni (1:nCols, 1:kMax)

**NS** - Snow  
NS          , &!REAL(KIND=r8), INTENT(INOUT) :: ns (1:nCols, 1:kMax)

**NR** - Rain    
NR          , &!REAL(KIND=r8), INTENT(INOUT) :: nr (1:nCols, 1:kMax)

**NG** - Graupel  
NG          , &!REAL(KIND=r8), INTENT(INOUT) :: NG (1:nCols, 1:kMax)   

**NC** - Cloud droplet  
NC          , &!REAL(KIND=r8), INTENT(INOUT) :: NC (1:nCols, 1:kMax)   

**TKE** - turbulence kinetic energy (m^2 s-2), NEEDED FOR DROPLET ACTIVATION  
TKE         , &!REAL(KIND=r8), INTENT(IN   ) :: TKE (1:nCols, 1:kMax)   

**KZH** -  heat eddy diffusion coefficient from YSU scheme (M^2 S-1), NEEDED FOR DROPLET ACTIVATION  
KZH         , &!REAL(KIND=r8), INTENT(IN   ) :: KZH (1:nCols, 1:kMax)   

**gps** - Pressão - removida da entrada da RNA - valor fixo  
gps         , &!gps- AIR PRESSURE (PA)

**DT_IN** - model time step (sec) - Removido da entrada da RNA
DT_IN       , &!REAL(KIND=r8), INTENT(IN   ) :: dt_in

**omega**  
omega       , &!REAL(KIND=r8), INTENT(IN   ) :: omega  ! omega (Pa/s)

**EFFCS** - CLOUD DROPLET EFFECTIVE RADIUS OUTPUT TO RADIATION CODE (micron)  
EFFCS       , &!REAL(KIND=r8), INTENT(OUT  ) :: EFFCS (1:nCols, 1:kMax)   

**EFFIS** - ICE DROPLET EFFECTIVE RADIUS OUTPUT TO RADIATION CODE (micron)  
EFFIS       , &!REAL(KIND=r8), INTENT(OUT  ) :: EFFIS (1:nCols, 1:kMax)   

**LSRAIN** - Large scale Rain  
LSRAIN      , &!REAL(KIND=r8), INTENT(OUT) :: LSRAIN(1:nCols)

**LSSNOW** - Large scale Snow  
LSSNOW        )!REAL(KIND=r8), INTENT(OUT) :: LSSNOW(1:nCols)


# Setup do dados

Carregamento dos dados

In [None]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import time
from sklearn import metrics, preprocessing

%tensorflow_version 2.x
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

import keras
from tensorflow.python.data import Dataset

from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold, train_test_split
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Input, Dropout, Conv1D, Flatten, MaxPooling1D, Conv2D, MaxPooling2D, BatchNormalization, Activation
from keras.utils.vis_utils import plot_model
from keras.callbacks import TensorBoard


import csv    

from datetime import datetime
from shutil import copyfile, copytree, rmtree

k_max = 28  # número de níveis utilizado

# % pip install XlsxWriter
# % pip install SkillMetrics
# import skill_metrics as sm


# tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
# pd.options.display.float_format = '{:.1f}'.format
pd.options.display.float_format = '{:f}'.format


# working local
#
# %load_ext tensorboard funciona local ???
# %reload_ext tensorboard ???
# colab_in_drive_root_dir = "/home/denis/_COM_BACKUP/NN_BAM1d/bam1d_data/Colab Notebooks"
# tb_logdir_base = colab_in_drive_root_dir + '/logs'
# data_in_drive_dir = "/media/denis/dados/_COM_BACKUP/NN_BAM1D/bam1d_data/"


# loading from gdrive - COLAB MOUNTS AUTOMATICALLY NOW
#
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
tb_logdir_base = './logs'
colab_in_drive_root_dir = "/content/drive/My Drive/Colab Notebooks/"
data_in_drive_dir = "/content/drive/My Drive/NN_Microphysics/"

# original_input_df = pd.read_csv("{}/hug_morr_inputs_CRD_RRTMG.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}/hug_morr_outputs_CRD_RRTMG.csv".format(data_in_drive_dir), sep=",")

# original_input_df = pd.read_csv("{}/hug_morr_inputs_CRD_HSR.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}/hug_morr_outputs_CRD_HSR.csv".format(data_in_drive_dir), sep=",")

# original_input_df = pd.read_csv("{}/hug_morr_inputs_CRD_RRTMG___dt_60.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}/hug_morr_outputs_CRD_RRTMG___dt_60.csv".format(data_in_drive_dir), sep=",")


# IOP com menos de 180 falta memória

# original_input_df = pd.read_csv("{}/IOP2014__CRD_RRTMG_dt_360/hug_morr_inputs.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}/IOP2014__CRD_RRTMG_dt_360/hug_morr_outputs.csv".format(data_in_drive_dir), sep=",")

# original_input_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_60/hug_morr_inputs.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_60/hug_morr_outputs.csv".format(data_in_drive_dir), sep=",")

# original_input_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_120/hug_morr_inputs.csv".format(data_in_drive_dir), sep=",")
# original_output_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_120/hug_morr_outputs.csv".format(data_in_drive_dir), sep=",")

original_input_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_180/hug_morr_inputs.csv".format(data_in_drive_dir), sep=",")
original_output_df = pd.read_csv("{}IOP2014__CRD_RRMTG_dt_180/hug_morr_outputs.csv".format(data_in_drive_dir), sep=",")



Tensorflow version 2.4.1
Mounted at /content/drive


## Análise dos dados

### Análise estatística

- Resumo dos valores das variáveis de entrada e saída. Observe que os valores são truncados no Resumo
- Valores do primeiro, centésimo e último passo de tempo
- Níveis verticais do primeiro, centésimo e último passo de tempo

In [None]:
print("\nAll input summary:")
display.display(original_input_df.describe())
print("\nAll output summary:")
display.display(original_output_df.describe())

print("\nInput First timestep:")
display.display(original_input_df.head(k_max))
print("\nInput 100th timestep:")
display.display(original_input_df.loc[k_max*100:k_max*100+27])
print("\nInput Last timestep:")
display.display(original_input_df.tail(k_max))

print("\nOutput First timestep:")
display.display(original_output_df.head(k_max))
print("\nOutput 100th timestep:")
display.display(original_output_df.loc[k_max*100:k_max*100+27])
print("\nOutput Last timestep:")
display.display(original_output_df.tail(k_max))

# original_output_df.describe().to_csv('/content/drive/My Drive/Colab Notebooks/temp_describe.csv')




All input summary:


Unnamed: 0,k,si,si_kmax_+_1,sl,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,gps,omega
count,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0
mean,14.5,0.468551,0.0,0.450519,237.80865,0.005162,1e-06,2e-06,0.0,2.4e-05,0.0,2358.732837,110745.661609,3917.264959,0.046469,97568.358504,0.469912,47.568224,99000.0,-0.011695
std,8.077748,0.363561,0.0,0.361058,61.352569,0.005985,1.4e-05,1.8e-05,2e-06,7.9e-05,2e-06,34806.115287,403911.484787,46250.692154,6.470318,857150.32691,1.438855,96.230471,0.0,0.079032
min,1.0,0.003669,0.0,0.001523,63.503048,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,99000.0,-1.076957
25%,7.75,0.10388,0.0,0.090635,202.558529,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,99000.0,-0.023818
50%,14.5,0.43224,0.0,0.399916,257.45809,0.001502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,99000.0,0.0
75%,21.25,0.83447,0.0,0.811268,288.992577,0.009945,0.0,0.0,0.0,0.0,0.0,0.0,30.092852,0.0,0.0,0.0,0.0,32.316904,99000.0,0.015078
max,28.0,1.0,0.0,0.994964,308.962369,0.016233,0.000408,0.000844,0.000149,0.001629,0.000388,1145094.4354,2942266.54164,1521556.56989,6038.50501,35494575.1176,6.0,300.0,99000.0,0.541837



All output summary:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
count,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0,4865308.0
mean,14.5,237.808947,0.005162,1e-06,2e-06,0.0,2.4e-05,0.0,2358.846785,110746.708095,3917.269937,0.046469,97568.358504,24.701405,27.424802,1e-06,0.0
std,8.077748,61.351244,0.005986,1.4e-05,1.8e-05,2e-06,7.9e-05,2e-06,34807.013005,403913.164816,46250.692216,6.470318,857150.32691,2.230185,14.292927,4e-06,0.0
min,1.0,63.503048,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,13.0,-0.0,-0.0
25%,7.75,202.558915,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
50%,14.5,257.52223,0.001563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
75%,21.25,288.99103,0.009955,0.0,0.0,0.0,0.0,0.0,0.0,30.095493,0.0,0.0,0.0,25.0,25.0,0.0,0.0
max,28.0,308.962369,0.016276,0.000408,0.000844,0.000149,0.001629,0.000388,1145094.4354,2942266.54164,1521556.56989,6038.50501,35494575.1176,27.581993,130.0,0.000163,0.0



Input First timestep:


Unnamed: 0,k,si,si_kmax_+_1,sl,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,gps,omega
0,1,1.000000,0.000000,0.994964,300.672944,0.016230,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.060765,99000.000000,-0.012614
1,2,0.989934,0.000000,0.981964,300.526379,0.015747,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.143061,99000.000000,-0.001955
2,3,0.974009,0.000000,0.964137,299.960846,0.015145,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,-0.028920
3,4,0.954290,0.000000,0.942167,298.651604,0.014704,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,-0.037237
4,5,0.930081,0.000000,0.915361,296.805719,0.014252,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,-0.033169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,24,0.042912,0.000000,0.035429,201.415438,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
24,25,0.028306,0.000000,0.022285,207.610504,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
25,26,0.016629,0.000000,0.011763,211.525561,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
26,27,0.007338,0.000000,0.005429,213.996873,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000



Input 100th timestep:


Unnamed: 0,k,si,si_kmax_+_1,sl,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,gps,omega
2800,1,1.000000,0.000000,0.994964,302.660529,0.016230,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,9.711810,99000.000000,-0.001430
2801,2,0.989934,0.000000,0.981964,301.403797,0.015747,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.200121,15.619333,99000.000000,-0.002425
2802,3,0.974009,0.000000,0.964137,299.771242,0.015145,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.196657,23.227907,99000.000000,-0.000621
2803,4,0.954290,0.000000,0.942167,297.668440,0.014704,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.237860,18.964212,99000.000000,0.004144
2804,5,0.930081,0.000000,0.915361,295.257251,0.014252,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.082481,8.552701,99000.000000,0.010388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2823,24,0.042912,0.000000,0.035429,199.198687,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
2824,25,0.028306,0.000000,0.022285,204.941411,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
2825,26,0.016629,0.000000,0.011763,206.179766,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
2826,27,0.007338,0.000000,0.005429,208.382146,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000



Input Last timestep:


Unnamed: 0,k,si,si_kmax_+_1,sl,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,gps,omega
4865280,1,1.000000,0.000000,0.994964,302.670632,0.016230,0.000000,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,9.841763,0.000000,0.000000,0.000000,11.103186,99000.000000,0.000416
4865281,2,0.989934,0.000000,0.981964,301.725666,0.015747,0.000000,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,11.777762,0.000000,0.000000,0.000000,44.312742,99000.000000,0.004870
4865282,3,0.974009,0.000000,0.964137,300.275980,0.015145,0.000000,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,15.588282,0.000000,0.000000,0.000000,0.100000,99000.000000,0.011449
4865283,4,0.954290,0.000000,0.942167,298.403754,0.014704,0.000000,0.000006,0.000000,0.000000,0.000000,0.000000,0.000000,22.097018,0.000000,0.000000,0.000000,0.100000,99000.000000,0.017780
4865284,5,0.930081,0.000000,0.915361,296.113026,0.014252,0.000000,0.000010,0.000000,0.000000,0.000000,0.000000,0.000000,33.900801,0.000000,0.000000,0.000000,0.100000,99000.000000,0.022871
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4865303,24,0.042912,0.000000,0.035429,180.170778,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,99000.000000,0.000000
4865304,25,0.028306,0.000000,0.022285,159.627927,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,300.000000,99000.000000,0.000000
4865305,26,0.016629,0.000000,0.011763,133.086872,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,6.000000,300.000000,99000.000000,0.000000
4865306,27,0.007338,0.000000,0.005429,110.417731,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,300.000000,99000.000000,0.000000



Output First timestep:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
0,1,300.672944,0.016230,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
1,2,300.526379,0.015747,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
2,3,299.960846,0.015145,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
3,4,298.651604,0.014704,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
4,5,296.805719,0.014252,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,24,201.415438,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
24,25,207.610504,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
25,26,211.525561,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000
26,27,213.996873,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,13.000000,0.000000,0.000000



Output 100th timestep:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
2800,1,302.660529,0.016230,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2801,2,301.403797,0.015747,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2802,3,299.771242,0.015145,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2803,4,297.668440,0.014704,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2804,5,295.257251,0.014252,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2823,24,199.198687,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2824,25,204.941411,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2825,26,206.179766,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000
2826,27,208.382146,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000000,0.000000



Output Last timestep:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
4865280,1,302.667146,0.016231,0.000000,0.000012,0.000000,0.000000,0.000000,0.000000,0.000000,10.530966,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865281,2,301.721692,0.015749,0.000000,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,13.721369,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865282,3,300.271134,0.015147,0.000000,0.000016,0.000000,0.000000,0.000000,0.000000,0.000000,20.247833,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865283,4,298.397783,0.014706,0.000000,0.000019,0.000000,0.000000,0.000000,0.000000,0.000000,31.630182,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865284,5,296.105611,0.014255,0.000000,0.000020,0.000000,0.000000,0.000000,0.000000,0.000000,51.484827,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4865303,24,180.170778,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865304,25,159.627927,0.000004,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865305,26,133.086872,0.000014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000
4865306,27,110.417731,0.000029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,25.000000,25.000000,0.000003,0.000000


### Histogramas das variáveis de entrada

In [None]:
# Input Histograms

# _ = original_input_df.hist(bins=50, figsize=(30, 20), xlabelsize=10)

### Histogramas das variáveis de saída

In [None]:
# Output Histograms

# _ = original_output_df.hist(bins=50, figsize=(30, 20), xlabelsize=10)
#_ = validation_examples.hist(bins=20, figsize=(18, 12), xlabelsize=10)

### Bloxpot das variáveis de entrada por níveis

In [None]:
# Boxplot of variables through levels

# for key in original_input_df.columns.tolist():
#     if key == "k": continue
#     # plt.subplot(6, 2, idx_plot)
#     original_input_df.boxplot(column=key, by="k", figsize=(15, 6))
# plt.show()


### Boxplot das variáveis de saída, por níveis

In [None]:
# Boxplot of variables through levels

# for key in original_output_df.columns.tolist():
#     if key == "k": continue
#     # plt.subplot(6, 2, idx_plot)
#     original_output_df.boxplot(column=key, by="k", figsize=(15, 6))
# plt.show()

### Gráfico de perfil vertical das variáves de entrada em 3 timesteps

In [None]:
# Plot the vertical levels of first, 100th and last timestep

# first_timestep_input_df = original_input_df.head(k_max)
# med_timestep_input_df = original_input_df.loc[k_max00:k_max00+27]
# last_timestep_input_df = original_input_df.tail(k_max)

# idx_plot = 1
# plt.figure(figsize=(10, 10))
# for key in last_timestep_input_df.columns.tolist():
#     plt.subplot(7, 3, idx_plot)
#     plt.ylabel("k")
#     plt.xlabel(key)
#     plt.plot(first_timestep_input_df[key], first_timestep_input_df[["k"]], label="first timestep")
#     plt.plot(med_timestep_input_df[key], med_timestep_input_df[["k"]], label="100th timestep")
#     plt.plot(last_timestep_input_df[key], last_timestep_input_df[["k"]], label="last timestep")
#     idx_plot += 1
# plt.legend(loc="upper left")
# plt.subplots_adjust(hspace=1, wspace=1)
# plt.show()


# first_timestep_output_df = original_output_df.head(k_max)
# med_timestep_output_df = original_output_df.loc[k_max01:k_max01+27]
# last_timestep_output_df = original_output_df.tail(k_max)

# idx_plot = 1
# for key in last_timestep_output_df.columns.tolist():
#     plt.subplot(4, 6, idx_plot)
#     plt.ylabel("k")
#     plt.xlabel(key)
#     plt.title("Output x Level")
#     plt.plot(first_timestep_output_df[key], first_timestep_output_df[["k"]], label="first timestep")
#     plt.plot(med_timestep_output_df[key], med_timestep_output_df[["k"]], label="100th timestep")
#     plt.plot(last_timestep_output_df[key], last_timestep_output_df[["k"]], label="last timestep")
#     plt.legend()
# plt.show()

## Pré-processando a entrada (exemplos) e saídas (metas)
- Selecionamos as primeiras variáveis de entrada e saída que serão usadas no treinamento e na validação;
- também podemos criar novas variáveis como um composto de variáveis

In [None]:
def preprocess_features(input_df):
  """Prepares input features input_df

  Args:
    input data frame: A Pandas DataFrame expected to contain data from input data set.
  Returns:
    A DataFrame that contains the features to be used for the model, including
    synthetic features.
  """
# All input variables
# "k", "si", "si_kmax_+_1", "sl", "Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "tke", "kzh", "gps", "omega"    
  selected_features = input_df[
    [
    "k", "si", "Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "tke", "kzh", "omega"    
    ]]
  processed_features = selected_features.copy()
  return processed_features


def preprocess_targets(output_df):
  """Prepares target features (i.e., labels) from output_df

  Args:
    output_df: A Pandas DataFrame expected to contain data from output data set.
  Returns:
    A DataFrame that contains the target features.
  """
# All output variables
# "k", "Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "EFFCS", "EFFIS", "LSRAIN", "LSSNOW"
  output_targets = output_df [
      [
      "k", "Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "EFFCS", "EFFIS", "LSRAIN", "LSSNOW"
      ]
  ]
  return output_targets


# execute the pre processing
all_examples = preprocess_features(original_input_df)
all_targets = preprocess_targets(original_output_df)

del original_input_df
del original_output_df


### Funções de normalização

In [None]:
def linear_scale(serie_or_np_arr, min_val, max_val):
  # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
  #   X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
  # X_scaled = X_std * (max - min) + min  
  min, max = 0, 1
  y = lambda x:( ( x - min_val) / (max_val - min_val)) * (max - min) + min 
  return y(serie_or_np_arr)
  # Linear normalization (serie or numpy 1D array)
  # -1 e 1

def delinear_scale(serie_or_np_arr, min_val, max_val):
  # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
  #   X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
  # X_scaled = X_std * (max - min) + min  
  min, max = 0, 1
  y = lambda x: min_val + (max_val-min_val)*(x - min) / (max - min)      
  return y(serie_or_np_arr)


def linear_scale_htang(serie_or_np_arr, min_val, max_val):
  scale = (max_val - min_val) / 2.0
  y = lambda x:((x - min_val) / scale) - 1.0
  return y(serie_or_np_arr)

# old bak
# def linear_scale(serie_or_np_arr):
#   min_val = serie_or_np_arr.min()
#   max_val = serie_or_np_arr.max()
#   scale = (max_val - min_val) / 2.0
#   y = lambda x:((x - min_val) / scale) - 1.0
#   return y(serie_or_np_arr), min_val, max_val


# Linear denormalization (serie or numpy 1D array)
#
# enter with min and max val of values before normalization
def delinear_scale_htang(serie_or_np_arr, min_val, max_val):
  scale = (max_val - min_val) / 2.0
  y = lambda x:((x + 1.0) * scale) + min_val
  return y(serie_or_np_arr)


# Different normalilzation functions

def log_normalize(series):
  return series.apply(lambda x:math.log(x+1.0))

def clip(series, clip_to_min, clip_to_max):
  return series.apply(lambda x:(
    min(max(x, clip_to_min), clip_to_max)))

def z_score_normalize(series):
  mean = series.mean()
  std_dv = series.std()
  return series.apply(lambda x:(x - mean) / std_dv)

def binary_threshold(series, threshold):
  return series.apply(lambda x:(1 if x > threshold else 0))


# cols = ['A', 'B']
# data =  pd.DataFrame(np.array([[2,3],[1.02,1.2],[0.5,0.3]]),columns=cols)
# serie = [2, 1.02, 0.5]
# display.display(data)

# scaler = preprocessing.MinMaxScaler(feature_range = (0,1))
# scaled_data = scaler.fit_transform(data[cols])
# display.display(scaled_data)

# scaled_data = linear_scale(data, 0.5, 2.0)
# display.display(scaled_data)

# de_scaled_data = delinear_scale(scaled_data, 0.5, 2.0)
# display.display(de_scaled_data)

# inv = scaler.inverse_transform(scaled_data)
# display.display(inv)


### Normalização da base usando escala linear

Normalização das entradas para a escala -1, 1. para os primeiros testes

Como regra geral, o NN treina melhor quando os recursos de entrada estão aproximadamente na mesma escala.

Isso ajuda o NN a não ficar preso em etapas que são muito grandes em uma dimensão ou muito pequenas em outra.


In [None]:
def normalize_linear_scale(examples_dataframe, minmax_dict=None, scaler="normalize"):
  """Returns a version of the input `DataFrame` that has all its features normalized linearly."""

  # Convert pandas data into a dict of np arrays.
  processed_features = examples_dataframe.copy()

  for key,value in dict(examples_dataframe).items():
    if key != 'k':
      if minmax_dict is None:
        min_val = value.min()
        max_val = value.max()
      else:
        min_val = minmax_dict[key][0]
        max_val = minmax_dict[key][1]
      if scaler == "normalize":
        processed_features[key] = linear_scale(value, min_val, max_val)
      else:
        processed_features[key] = linear_scale_htang(value, min_val, max_val)
  return processed_features


#### Boxplot das variáveis de entrada linearmente normalizadas

In [None]:
#
# code below just to show
#

# normalized_all_examples = normalize_linear_scale(all_examples)
# # normalized_all_examples_htang = normalize_linear_scale(all_examples, scaler="htang")

# print("All normalized examples summary:")
# display.display(normalized_all_examples.describe())

# # Boxplot of variables through levels
# for key in normalized_all_examples.columns.tolist():
#     if key == "k": continue
#     # plt.subplot(6, 2, idx_plot)
#     normalized_all_examples.boxplot(column=key, by="k", figsize=(15, 6))
# plt.show()

#### Boxplot das variáveis de saída linearmente normalizadas

In [None]:
#
# code below just to show
#

# normalized_all_targets = normalize_linear_scale(all_targets)

# print("All normalized targets summary:")
# display.display(normalized_all_targets.describe())

# # Boxplot of variables through levels
# for key in normalized_all_targets.columns.tolist():
#     if key == "k": continue
#     # plt.subplot(6, 2, idx_plot)
#     normalized_all_targets.boxplot(column=key, by="k", figsize=(15, 6))
# plt.show()

### Normalização da base usando diferentes escalas para cada variável

- Diferentes normalizações por variável, como linear, logarítimica e z-score


In [None]:
def normalize_examples(examples_dataframe):
  # "k", "si", "sl", "Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "tke", "kzh", "omega"
  processed_features = pd.DataFrame()

  if len(examples_dataframe["k"]) > 0:
    processed_features["k"] = examples_dataframe["k"]

  for m_var in [ "si", "Tc", "qv"]:
    processed_features[m_var] = linear_scale(examples_dataframe[m_var], examples_dataframe[m_var].min(), examples_dataframe[m_var].max())

  for m_var in [ "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "tke", "kzh"]:
    processed_features[m_var] = log_normalize(examples_dataframe[m_var])

  processed_features["omega"] = z_score_normalize(examples_dataframe["omega"])
  # processed_features["population"] = linear_scale(clip(examples_dataframe["population"], 0, 5000))
    
  return processed_features


#### Boxplot das variáveis de entrada com diferentes normalizações

In [None]:
# normalized_all_examples = normalize_examples(all_examples)
# normalized_all_targets = normalize_linear_scale(all_targets)

# print("All normalized examples summary:")
# display.display(normalized_all_examples.describe())

# print("All normalized targets summary:")
# display.display(normalized_all_targets.describe())


# # Boxplot of variables through levels
# for key in normalized_all_examples.columns.tolist():
#     if key == "k": continue
#     # plt.subplot(6, 2, idx_plot)
#     normalized_all_examples.boxplot(column=key, by="k", figsize=(15, 6))
# plt.show()

## Amostras dos Conjuntos de treinamento e validação

Exibe  o resumo de entrada e saída de conjuntos de treinamento e validação, para verificar a homogeneidade desses conjuntos


In [None]:
def get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, train_percent_size):
  # Choose the first x percent for training.
  all_size = len(normalized_all_examples)
  train_size = int(all_size * train_percent_size / k_max)*k_max
  print("train size = {}".format(train_size))

  normalized_training_examples = normalized_all_examples.head(train_size)
  normalized_training_targets = normalized_all_targets.head(train_size)

  # Choose the last (10%) (out of 265468) for validation.
  normalized_validation_examples = normalized_all_examples.iloc[train_size:]
  normalized_validation_targets = normalized_all_targets.iloc[train_size:]
  return normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets


# versão atual que grava primeiro por variavel depois por nivel, e remove niveis de LSRAIN e LSSNOW
def get_df_col_k(df_orig, k_inicial, k_final, dic_var_levs_exclude=None):
  
  print(df_orig.columns)
  dic_var_levs_exclude_ok = { \
  'LSRAIN':range(2,29),
  'LSSNOW':range(2,29)
  }  
  if dic_var_levs_exclude is not None:
    dic_var_levs_exclude_ok.update(dic_var_levs_exclude)
    
  df_col_k = pd.DataFrame()
  for col in df_orig.columns.tolist():
    if col == 'k':
        continue
    for k in range(k_inicial, k_final + 1):
      df = df_orig.copy()
      df_k = df[df['k'] == k]
      if col in dic_var_levs_exclude_ok.keys() and k in dic_var_levs_exclude_ok[col]:
          continue
      df_col_k['{}_k{}'.format(col, k)] = df_k[col].to_numpy()
  return df_col_k


# versão anterior que grava primeiro por nivel depois por variavel
# def get_df_col_k(df_orig, k_inicial, k_final, dic_var_levs_exclude=None):
#   df_col_k = pd.DataFrame()
#   for k in range(k_inicial, k_final + 1):
#     df = df_orig.copy()
#     df_k = df[df['k'] == k]
#     for col in df.columns.tolist():
#       if col == 'k':
#         continue
#       if dic_var_levs_exclude is not None and col in dic_var_levs_exclude.keys():
#         if k in dic_var_levs_exclude[col]:
#           continue
#       df_col_k['{}_k{}'.format(col, k)] = df_k[col].to_numpy()
#   return df_col_k


def get_arr_2D_col_k(df_orig, colname, k_inicial, k_final):
  levels = k_final - k_inicial + 1
  rows_one_level = int(len(df_orig.index)/levels)
  arr_col_2d = np.zeros((rows_one_level, k_max))

  for k in range(k_inicial, k_final + 1):
    df = df_orig.copy()
    df_k = df[df['k'] == k]
    arr_col_2d[:, k-1] = df_k[colname].to_numpy()
 
  return arr_col_2d

In [None]:
# Choose variables to train and validate
# Eliminate variables are not varying
# Separate 70% for training and 30# for validation

training_examples_tmp, training_targets_tmp, validation_examples_tmp, validation_targets_tmp = get_percent_normalized_trainining_and_validation(all_examples, all_targets, 0.7)

print("Training examples summary:")
display.display(training_examples_tmp.describe())

print("Validation examples summary:")
display.display(validation_examples_tmp.describe())

print("Training targets summary:")
display.display(training_targets_tmp.describe())

print("Validation targets summary:")
display.display(validation_targets_tmp.describe())


train size = 3405696
Training examples summary:


Unnamed: 0,k,si,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,omega
count,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0
mean,14.5,0.468551,237.594765,0.005161,1e-06,3e-06,0.0,2.4e-05,0.0,2255.438053,101151.914857,4068.123044,0.05692,101417.234712,0.454196,45.851484,-0.01242
std,8.077748,0.363561,61.436343,0.005985,1.4e-05,1.8e-05,2e-06,7.9e-05,2e-06,33985.727158,385104.416531,47210.18008,7.694274,877361.886257,1.40905,94.082704,0.078518
min,1.0,0.003669,63.503048,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,-0.985995
25%,7.75,0.10388,202.663399,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,-0.024188
50%,14.5,0.43224,257.231255,0.001502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
75%,21.25,0.83447,288.878719,0.009945,0.0,0.0,0.0,0.0,0.0,0.0,17.125936,0.0,0.0,0.0,0.0,31.698106,0.014214
max,28.0,1.0,308.583864,0.016233,0.000408,0.000844,0.000149,0.001325,0.000388,1145094.4354,2942266.54164,1521556.56989,6038.50501,35494575.1176,6.0,300.0,0.541837


Validation examples summary:


Unnamed: 0,k,si,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,tke,kzh,omega
count,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0
mean,14.5,0.468551,238.307706,0.005162,1e-06,2e-06,0.0,2.5e-05,0.0,2599.749378,133130.641273,3565.269513,0.022085,88587.819629,0.506581,51.573874,-0.010004
std,8.07775,0.363561,61.153765,0.005986,1.3e-05,1.8e-05,2e-06,8.1e-05,1e-06,36647.852023,443906.088633,43928.512795,1.188425,807956.300147,1.505469,100.951058,0.080191
min,1.0,0.003669,73.449916,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,-1.076957
25%,7.75,0.10388,202.356155,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,-0.022978
50%,14.5,0.43224,257.704029,0.001508,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
75%,21.25,0.83447,289.448154,0.009945,0.0,0.0,0.0,0.0,0.0,0.0,97.888145,0.0,0.0,0.0,0.0,34.307073,0.017509
max,28.0,1.0,308.962369,0.01623,0.000374,0.000747,0.000129,0.001629,0.000387,1145094.416,2938224.98426,1462846.46511,221.915976,18389107.8624,6.0,300.0,0.396736


Training targets summary:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
count,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0,3405696.0
mean,14.5,237.595058,0.005161,1e-06,3e-06,0.0,2.4e-05,0.0,2255.438053,101151.940145,4068.401709,0.05692,101422.048628,24.683494,27.500308,1e-06,0.0
std,8.077748,61.435035,0.005986,1.4e-05,1.8e-05,2e-06,7.9e-05,2e-06,33985.727158,385104.411211,47211.505371,7.694274,877381.472657,2.304218,14.627737,4e-06,0.0
min,1.0,63.503048,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,13.0,-0.0,-0.0
25%,7.75,202.664223,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
50%,14.5,257.319978,0.001563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
75%,21.25,288.877419,0.009955,0.0,0.0,0.0,0.0,0.0,0.0,17.12804,0.0,0.0,0.0,25.0,25.0,0.0,0.0
max,28.0,308.583864,0.016276,0.000408,0.000844,0.000149,0.001325,0.000388,1145094.4354,2942266.54164,1521556.56989,6038.50501,35494575.1176,27.581993,130.0,0.000133,0.0


Validation targets summary:


Unnamed: 0,k,Tc,qv,qc,qr,qi,qs,qg,ni,ns,nr,NG,NC,EFFCS,EFFIS,LSRAIN,LSSNOW
count,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0,1459612.0
mean,14.5,238.308012,0.005162,1e-06,2e-06,0.0,2.5e-05,0.0,2600.129198,133134.070509,3564.6359,0.022084,88576.587373,24.743197,27.248625,1e-06,0.0
std,8.07775,61.1524,0.005986,1.3e-05,1.8e-05,2e-06,8.1e-05,1e-06,36650.691428,443911.02318,43925.182218,1.188425,807906.49299,2.046452,13.477777,3e-06,0.0
min,1.0,73.449916,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,13.0,-0.0,-0.0
25%,7.75,202.35631,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
50%,14.5,257.744712,0.001563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,0.0,0.0
75%,21.25,289.447436,0.009953,0.0,0.0,0.0,0.0,0.0,0.0,97.909875,0.0,0.0,0.0,25.0,25.0,0.0,0.0
max,28.0,308.962369,0.016269,0.000374,0.000747,0.000129,0.001629,0.000387,1145094.416,2938224.98426,1462846.46511,221.915976,18389107.8624,26.671755,130.0,0.000163,0.0


In [None]:
# Set using Variables per K
# training_examples_tmp, training_targets_tmp, validation_examples_tmp, validation_targets_tmp = get_percent_normalized_trainining_and_validation(all_examples, all_targets, 0.7)

# k_inicial = 1
# k_final = k_max

# training_examples_tmp = get_df_col_k(training_examples_tmp, k_inicial, k_final)
# training_targets_tmp = get_df_col_k(training_targets_tmp, k_inicial, k_final)
# validation_examples_tmp = get_df_col_k(validation_examples_tmp, k_inicial, k_final)
# validation_targets_tmp = get_df_col_k(validation_targets_tmp, k_inicial, k_final)



# display.display(training_examples_tmp.describe())
# display.display(validation_examples_tmp.describe())
# display.display(training_targets_tmp.describe())
# display.display(validation_targets_tmp.describe())


# Treinamento da rede neural: Rotinas utilizadas


In [None]:
def construct_feature_columns(input_features):
  """Construct the TensorFlow Feature Columns.

  Args:
    input_features: The names of the numerical input features to use.
  Returns:
    A set of feature columns
  """ 
  return set([tf.feature_column.numeric_column(my_feature)
              for my_feature in input_features])

In [None]:
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a neural network model.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
    
    # Convert pandas data into a dict of np arrays.
    features = {key:np.array(value) for key,value in dict(features).items()}                                           
 
    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified.
    if shuffle:
      ds = ds.shuffle(10000)
    
    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    print("Features:", features)
    print("Labels:", labels)
    return features, labels

In [None]:
def use_tpu():

# https://www.tensorflow.org/guide/tpu

# eager é bom pra avaliar os objetos tensor sem precisar de graphs.
# Nesse caso desativo pra não dar o erro Tensor.graph is meaningless when eager execution is enabled. 

  # tf.compat.v1.disable_eager_execution()

  try:

    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
    # para TF2
    # resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])  # https://www.tensorflow.org/guide/tpu
    print('Running on TPU ', resolver.cluster_spec().as_dict()['worker'])
  except ValueError:
    raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
  
  # requer eager
  # tf.config.experimental_connect_to_cluster(resolver)

  # This is the TPU initialization code that has to be at the beginning.
  tf.tpu.experimental.initialize_tpu_system(resolver)
  print("All devices: ", tf.config.list_logical_devices('TPU'))
  tpu_strategy = tf.distribute.TPUStrategy(resolver)
  return tpu_strategy

# tpu_strategy = use_tpu()
# with tpu_strategy.scope():
#  code here

In [None]:
def use_gpu():
# Using GPU ...  mais lento que TPU - mesmo temo que CPU ou pouco mais rápido:
# ex: 
#   INFO:tensorflow:global_step/sec: 299.778
#   INFO:tensorflow:loss = 3.5140946, step = 200 (0.332 sec)
  
  device_name = tf.test.gpu_device_name()
  if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
  print('Found GPU at: {}'.format(device_name))

# use_gpu()
# with tf.device('/device:GPU:0') as gpu:
#     print("Gpu name={}".format(gpu))


In [None]:
def get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=False):

  # TODO - original_*_df foi limpo pra limpar memória, após a sua última utilização em 
  # all_examples, all_targets. Tentar usar all_examples, all_targets

  spinup_size = int(levels * spin_hours * 3600/delta_t) 
  total_size = len(original_input_df)
  no_spinup_size = total_size-spinup_size
  all_examples_tmp = original_input_df.tail(no_spinup_size)
  all_targets_tmp = original_output_df.tail(no_spinup_size)
  print("First 3 examples and targets for checking spinup start")
  display.display(all_examples_tmp.head(3))
  display.display(all_targets_tmp.head(3))

  if pre_process:
    # select initial filtered variables
    all_examples_tmp = preprocess_features(all_examples_tmp)
    all_targets_tmp = preprocess_targets(all_targets_tmp)

  return all_examples_tmp, all_targets_tmp, no_spinup_size

## Construção das NN customizáveis

In [None]:
# get sample dataset
def get_dataset():
    X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3, random_state=2)
    return X, y


def get_custom_model(n_inputs, n_outputs, neurons_per_layer, optimizer='adam', loss='mse'):
    # dropout = Fraction of the input units to drop
    dropout = 0
    inputs = Input(shape=n_inputs)
    x = inputs
    for neurons in neurons_per_layer:
        x = Dense(neurons, activation='relu')(x)
    # TODO level_all = Dense(self.train_y.shape[1])(x)
    level_all = Dense(n_outputs)(x)
    model = Model(inputs=inputs, outputs=level_all)
    model.compile(loss=loss, optimizer=optimizer, metrics=[tf.keras.metrics.RootMeanSquaredError()])
  
    return model


def get_cnn_model(n_inputs, n_outputs, arr_convs_filters_kernels, neurons_per_layer, batch_norm=None, dropout=None, max_pooling=None, optimizer='adam', loss='mse', timesteps_window=None ):
    # dropout = Fraction of the input units to drop
  
    # kernel = 3
    # 448 - (kernel-1)
    # 446 
    model = Sequential()
    for conv in arr_convs_filters_kernels:
      if timesteps_window is None:
        model.add(Conv1D(filters=conv[0], kernel_size=conv[1], activation='relu', input_shape=(n_inputs,1)))
      else:
        model.add(Conv1D(filters=conv[0], kernel_size=conv[1], activation='relu', input_shape=(timesteps_window, n_inputs)))
      if batch_norm is not None:
        # https://www.kdnuggets.com/2018/09/dropout-convolutional-networks.html
        model.add(BatchNormalization())
        model.add(Activation("relu"))
    
    if max_pooling is not None:
      model.add(MaxPooling1D(pool_size=max_pooling))
    model.add(Flatten())
    
    for neurons in neurons_per_layer:
        if neurons is not None:
          model.add(Dense(neurons, activation='relu'))
          if dropout is not None:
            # https://www.kdnuggets.com/2018/09/dropout-convolutional-networks.html
            model.add(Dropout(dropout))
    
    # precisa fazer o reshape quando for usar (caso com timesteps_window)
    model.add(Dense(n_outputs))

    model.compile(loss=loss, optimizer=optimizer, metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model


# idem mas com Sequential - erro ?
# def get_custom_model(n_inputs, n_outputs, neurons_per_layer, optimizer='adam', loss='mse'):

#     model = keras.Sequential()
#     first = True
#     for neurons in neurons_per_layer:
#       if first:
#         model.add(Dense(neurons, input_dim=n_inputs, activation="relu"))
#         first = False
#       else:
#         model.add(Dense(neurons, activation="relu"))

#     model.add(Dense(n_outputs))
#     model.compile(loss=loss, optimizer=optimizer, metrics=[tf.keras.metrics.RootMeanSquaredError()])
#     return model
    

# get the model
def get_sample_model(n_inputs, n_outputs, loss='mae'):
    # losses = mean_squared_error, mae ... https://keras.io/api/losses/
    
    model = Sequential()
    model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs))
    model.compile(loss=loss, optimizer='adam')
    return model


def get_curr_time():
    return datetime.now().strftime("%m-%d.%H.%M")


def get_tb_logdir_var(test_name, layers, epochs, target_var="ALL_VARS"):
  log_dir_test = get_tb_logdir_test(test_name)
  return "{}/{}__{}-layers__{}-epochs___{}".format(log_dir_test, target_var, layers, epochs, get_curr_time())


def create_tb_logdir_test(test_name):
  tb_logdir_test = get_tb_logdir_test(test_name)
  rmtree(tb_logdir_test, ignore_errors=True)
  os.makedirs(tb_logdir_test)
  return tb_logdir_test


def get_tb_logdir_test(test_name):
  return "{}/{}".format(tb_logdir_base, test_name)


def get_test_dir_name(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name):
  test_dir_name = '{}_{}-inputs_{}-layers_{}-outputs_{}-epochs__{}'.format(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name)
  return test_dir_name


def get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name):
  save_dir='{}/saved_models'.format(colab_in_drive_root_dir)
  model_var_name_dir = '{}/{}'.format(save_dir, get_test_dir_name(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name))
  return model_var_name_dir


def get_drive_test_log_dir(test_name):
  drive_logs_test_dir = '{}/logs/{}'.format(colab_in_drive_root_dir, test_name)
  return drive_logs_test_dir


def create_drive_test_log_dir(test_name):
  drive_logs_test_dir = get_drive_test_log_dir(test_name)
  rmtree(drive_logs_test_dir, ignore_errors=True)
  os.makedirs(drive_logs_test_dir)
  return drive_logs_test_dir


def save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir, var_name='ALL_VARS'):
  model_var_name_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name)
  rmtree(model_var_name_dir, ignore_errors=True)
  os.makedirs(model_var_name_dir)
  log_msg('Saving model in {} ...'.format(model_var_name_dir))
  
  model.save(model_var_name_dir)
  model.save(model_var_name_dir + '/tmp_model.h5')

  plot_model(model, show_shapes=True, to_file='{}/model.png'.format(model_var_name_dir))

  test_dir_name = get_test_dir_name(test_name, n_inputs, hidden_layers, n_outputs, epochs, var_name)
  drive_logs_test_var_dir = '{}/{}'.format(drive_logs_test_dir, test_dir_name)
  copytree(log_dir_var, drive_logs_test_var_dir)
  log_msg('Saving logs in {} ...'.format(drive_logs_test_var_dir))


# train and evaluate a model using repeated k-fold cross-validation
def train_model(model, X, y, tb_log_dir, epochs=200, mode='split', n_splits=10, batch_size=64):
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_log_dir) 
    log_msg("Start training")
    start = time.time()
    results = list()
    # if len(X.shape) == 1:
    #     n_inputs = 1
    # else:
    #     n_inputs = X.shape[1]
    # if len(y.shape) == 1:
    #     n_outputs = 1
    # else:
    #     n_outputs = y.shape[1]

    if mode == 'k-folds':
      # Using k-folds ---- bad- must use many splits (number of itens trained) - to slow
      #
      n_repeats = 2
      epochs_k = int(epochs / n_splits)
      cv = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=1)
      first_time=True
      # enumerate folds
      for train_ix, test_ix in cv.split(X):
          # prepare data
          X_train, X_test = X[train_ix], X[test_ix]
          y_train, y_test = y[train_ix], y[test_ix]
          if first_time:
            model.fit(X_train, y_train, verbose=1, epochs=epochs_k, callbacks=[tensorboard_callback], batch_size=batch_size, use_multiprocessing=True)
            first_time = False
          else:
            model.fit(X_train, y_train, verbose=1, epochs=epochs_k, batch_size=batch_size, use_multiprocessing=True)
          # evaluate model on test set
          loss_metric = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size, use_multiprocessing=True)
          log_msg( 'Test on TRAINING set. split {} of {} : [{} loss, metric(rmse)]={}'.format(spl, n_splits, model.loss, loss_metric))
          # store result
          results.append(loss_metric)

    elif mode == 'split':
      early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=20)

      for spl in range(n_splits):
        # Train / test split division (20% of total = 0.2/0.9 = 0.22222)
        #
        X_train, X_test, y_train, y_test = train_test_split(
          X, y, test_size=(0.2/0.9), random_state=None)
        display.display(X_train.shape)
        display.display(X_test.shape)
        display.display(y_train.shape)
        display.display(y_test.shape)
        
        # model.fit(X_train, y_train, verbose=1, epochs=epochs, callbacks=[early_stop_callback, tensorboard_callback], batch_size=batch_size, use_multiprocessing=True)
        model.fit(X_train, y_train, verbose=1, epochs=epochs, callbacks=[tensorboard_callback, early_stop_callback], batch_size=batch_size, use_multiprocessing=True)
        
        # evaluate model on test set
        loss_metric = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size, use_multiprocessing=True)
        log_msg( 'Test on TRAINING set. split {} of {} : [{} loss, metric(rmse)]={}'.format(spl+1, n_splits, model.loss, loss_metric))
        results.append(loss_metric)

    end = time.time()
    log_msg("Training finished in {} seconds".format(end-start))
    log_msg( 'All training mse losses/rmse metric all spits: \n {}'.format(results))
    log_msg( '======> Test on TRAINING set: [{} loss, metric(rmse)]={}'.format(model.loss, loss_metric))
    return results


def validate_model(model, normalized_validation_examples, normalized_validation_targets, batch_size=64, is_CNN=False):
  log_msg("Validating model in test set ...")

  arr_input_val = normalized_validation_examples.to_numpy()
  if len(arr_input_val.shape) > 1:
    num_vars = arr_input_val.shape[1]
  else:
    num_vars = 1
  log_msg("Input Validation variables/size = {}/{}".format(num_vars, arr_input_val.shape[0])) 

  if is_CNN:
    arr_input_val = np.expand_dims(arr_input_val, 2)
    
  arr_output_val = normalized_validation_targets.to_numpy()
  if len(arr_output_val.shape) > 1:
    num_vars = arr_output_val.shape[1]
  else:
    num_vars = 1
  log_msg("Output Validation variables/size = {}/{}".format(num_vars, arr_output_val.shape[0])) 

  loss_metric_eval = model.evaluate(arr_input_val, arr_output_val, verbose=1, batch_size=batch_size, use_multiprocessing=True) 
  log_msg('=====> Final Test on VALIDATION set: [loss/rmse] = {}'.format(loss_metric_eval))


def validate_model_cnn_timesteps(model, arr_input_val, arr_output_val, batch_size=64):
  log_msg("Validating model in test set ...")
  loss_metric_eval = model.evaluate(arr_input_val, arr_output_val, verbose=1, batch_size=batch_size, use_multiprocessing=True) 
  log_msg('=====> Final Test on VALIDATION set: [loss/rmse] = {}'.format(loss_metric_eval))


def log_msg(message):
  log_file = '{}/log_messages.txt'.format(get_drive_test_log_dir(test_name))
  f = open(log_file, 'a')
  f.write('\n' + message)
  print(message)
 

# sample execution
# #
# # load dataset
# X, y = get_dataset()
# n_inputs, n_outputs = X.shape[1], y.shape[1]
# print(n_inputs, n_outputs)
# n_outputs
# model = get_custom_model(n_inputs, n_outputs, [n_inputs, n_outputs]) #, 'mean_squared_error'
# # model = get_sample_model(n_inputs, n_outputs, 'loss_metric')
# test_name = 'kerastest'
# drive_logs_test_dir = create_drive_test_log_dir(test_name)

# # model.build()
# model.summary(print_fn=log_msg)
# plot_model(model, to_file='./model.png')
# train_model(model, X, y, drive_logs_test_dir, epochs=100, n_splits=3)



# Testes e validação

## Rotinas para cálculo de métricas sobre os resultados

In [None]:
#
# rotinas para cálculo de métricas sobre os resultados
#

def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted


def bias(actual: np.ndarray, predicted: np.ndarray):
    """ BIAS """
    return np.mean(_error(actual, predicted))


def mse(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Squared Error """
    return np.mean(np.square(_error(actual, predicted)))


def rmse(actual: np.ndarray, predicted: np.ndarray):
    """ Root Mean Squared Error """
    return np.sqrt(mse(actual, predicted))


def mae(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Absolute Error """
    return np.mean(np.abs(_error(actual, predicted)))


def _percentage_error(actual: np.ndarray, predicted: np.ndarray):
    """
    Percentage error
    Note: result is NOT multiplied by 100
    """
    return _error(actual, predicted) / (actual + EPSILON)

    
def mpe(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Percentage Error """
    return np.mean(_percentage_error(actual, predicted))


def mape(actual: np.ndarray, predicted: np.ndarray):
    """
    Mean Absolute Percentage Error
    Properties:
        + Easy to interpret
        + Scale independent
        - Biased, not symmetric
        - Undefined when actual[t] == 0
    Note: result is NOT multiplied by 100
    """
    return np.mean(np.abs(_percentage_error(actual, predicted)))


def rmspe(actual: np.ndarray, predicted: np.ndarray):
    """
    Root Mean Squared Percentage Error
    Note: result is NOT multiplied by 100
    """
    return np.sqrt(np.mean(np.square(_percentage_error(actual, predicted))))


def write_header_B(normalized_all_targets, writer):
  df_tmp = normalized_all_targets.copy()
  if 'k' in df_tmp: 
    df_tmp.drop(columns=['k'], inplace=True)
  writer.writerow(df_tmp.columns)
  df_tmp = None


def get_rmse_csv_writer():
  return get_csv_writer('log_rmse_tmp.csv')


def get_csv_writer(fname):
  log_file_name = '{}/{}'.format(get_drive_test_log_dir(test_name), fname)
  file = open(log_file_name, 'a+')
  writer = csv.writer(file)
  return writer, file


def get_mean_csv_writer():
  log_file_name = '{}/log_mean_tmp.csv'.format(get_drive_test_log_dir(test_name))
  file = open(log_file_name, 'a+')
  writer = csv.writer(file)
  return writer, file


def calc_rmse_B(normalized_validation_examples, normalized_validation_targets, writer):
  # validation on every line of output file

  arr_input_val = normalized_validation_examples.to_numpy()
  arr_output_val = normalized_validation_targets.to_numpy()

  log_msg("predicting {} registers ".format(arr_input_val.shape[0]))
  start = time.time()
  y = model.predict(arr_input_val, use_multiprocessing=True)
  end = time.time()
  log_msg("Prediction time MULTIPROCESSING in {} seconds".format(end-start))

  line_rmse = []
  for col_idx in range(arr_output_val.shape[1]):
    line_rmse.append(rmse(y[:,col_idx], arr_output_val[:,col_idx]))
  writer.writerow(line_rmse)

  # just to calculate prediction of 1 pass into routine ...

  arr_input_val = normalized_validation_examples.tail(k_max).to_numpy()
  log_msg("predicting {} registers ".format(arr_input_val.shape[0]))
  start = time.time()
  y = model.predict(arr_input_val, use_multiprocessing=True)
  end = time.time()
  log_msg("Prediction time of 1 pass into routine MULTIPROCESSING in {} seconds".format(end-start))


def calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values, dic_var_default_values=None, is_CNN=False):
    
  writer_rmse, frmse = get_rmse_csv_writer()
  fbias_name = 'log_bias_tmp.csv'
  writer_bias, fbias = get_csv_writer(fbias_name)
  fmse_name = 'log_mse_tmp.csv'
  writer_mse, fmse = get_csv_writer(fmse_name)
  fmae_name = 'log_mae_tmp.csv'
  writer_mae, fmae = get_csv_writer(fmae_name)
  fmean_pred_name = 'log_mean_pred_tmp.csv'
  writer_mean_pred, fmean_pred = get_csv_writer(fmean_pred_name)
  fmean_obs_name = 'log_mean_obs_tmp.csv'
  writer_mean_obs, fmean_obs = get_csv_writer(fmean_obs_name)

  write_header_B(normalized_validation_targets, writer_rmse)
  write_header_B(normalized_validation_targets, writer_mse)
  write_header_B(normalized_validation_targets, writer_bias)
  write_header_B(normalized_validation_targets, writer_mae)
  write_header_B(normalized_validation_targets, writer_mean_pred)
  write_header_B(normalized_validation_targets, writer_mean_obs)

  arr_input_val = normalized_validation_examples.to_numpy()
  if is_CNN:
    arr_input_val = np.expand_dims(arr_input_val, 2)
  arr_output_val = normalized_validation_targets.to_numpy()

  log_msg("predicting {} registers ".format(arr_input_val.shape[0]))
  start = time.time()
  y = model.predict(arr_input_val, use_multiprocessing=True)
  end = time.time()
  log_msg("Prediction time MULTIPROCESSING in {} seconds".format(end-start))

  line_bias = []
  line_mae = []
  line_rmse = []
  line_mse = []
  line_mean_pred = []
  line_mean_obs = []
  for col_idx in range(arr_output_val.shape[1]):
    line_mse.append(mse(arr_output_val[:,col_idx], y[:,col_idx]))
    line_rmse.append(rmse(arr_output_val[:,col_idx], y[:,col_idx]))
    line_bias.append(bias(arr_output_val[:,col_idx], y[:,col_idx]))
    line_mae.append(mae(arr_output_val[:,col_idx], y[:,col_idx]))
    line_mean_pred.append(np.average(y[:,col_idx]))
    line_mean_obs.append(np.average(arr_output_val[:,col_idx]))

  
  writer_rmse.writerow(line_rmse)
  writer_mse.writerow(line_mse)
  writer_bias.writerow(line_bias)
  writer_mae.writerow(line_mae)
  writer_mean_pred.writerow(line_mean_pred)
  writer_mean_obs.writerow(line_mean_obs)

  frmse.close()
  fmse.close()
  fbias.close()
  fmae.close()
  fmean_pred.close()
  fmean_obs.close()

  # convert cols to rows, putting zeros in rows without values
  #
  create_files_csv(all_targets_min_values, all_targets_max_values, 'log_rmse_tmp.csv', 'log_rmse.csv', 'log_rmse_denorm.csv', dic_var_default_values=dic_var_default_values)
  create_files_csv(all_targets_min_values, all_targets_max_values, fbias_name, 'log_bias.csv', 'log_bias_denorm.csv', dic_var_default_values=dic_var_default_values)
  create_files_csv(all_targets_min_values, all_targets_max_values, fmae_name, 'log_mae.csv', 'log_mae_denorm.csv', dic_var_default_values=dic_var_default_values)
  create_files_csv(all_targets_min_values, all_targets_max_values, fmse_name, 'log_mse.csv', 'log_mse_denorm.csv', dic_var_default_values=dic_var_default_values)  
  create_files_csv(all_targets_min_values, all_targets_max_values, fmean_pred_name, 'log_mean_pred.csv', 'log_mean_pred_denorm.csv', scale_or_delinear='delinear', dic_var_default_values=dic_var_default_values)
  create_files_csv(all_targets_min_values, all_targets_max_values, fmean_obs_name, 'log_mean_obs.csv', 'log_mean_obs_denorm.csv', scale_or_delinear='delinear', dic_var_default_values=dic_var_default_values)

  # just to calculate prediction of 1 pass into routine ...
  arr_input_val = normalized_validation_examples.tail(k_max).to_numpy()
  if is_CNN:
    arr_input_val = np.expand_dims(arr_input_val, 2)
  log_msg("predicting {} registers ".format(arr_input_val.shape[0]))
  start = time.time()
  y = model.predict(arr_input_val, use_multiprocessing=True)
  end = time.time()
  log_msg("Prediction time of 1 pass into routine MULTIPROCESSING in {} seconds".format(end-start))


def create_files_csv(min_values, max_values, log_tmp_file, new_log_filename, new_log_denorm_filename, scale_or_delinear='scale', dic_var_default_values=None):
  log_file_name = '{}/{}'.format(get_drive_test_log_dir(test_name), log_tmp_file)
  df = pd.read_csv(log_file_name)
  all_out_columns = ["Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "EFFCS", "EFFIS", "LSRAIN", "LSSNOW"]
  df_t = pd.DataFrame(columns=all_out_columns)
  for col_name in all_out_columns:
      if scale_or_delinear == 'delinear' and dic_var_default_values is not None and col_name in dic_var_default_values.keys():
        df_t[col_name] = [dic_var_default_values[col_name]] * k_max
      else:
        df_t[col_name] = [0.0] * k_max
  df_t_denorm = df_t.copy()
  for col in df.columns:
      col_name_k = col.split("_k")
      col_name = col_name_k[0]
      k = int(col_name_k[1])
      df_t[col_name].iloc[k-1] = df[col].values[0]
      if scale_or_delinear == 'scale':
        df_t_denorm[col_name].iloc[k-1] = df[col].values[0] * (max_values[col_name] - min_values[col_name])
      else:  # delinear scale
        df_t_denorm[col_name].iloc[k-1] = delinear_scale(df[col].values[0], min_values[col_name], max_values[col_name])
  
  df_t.to_csv('{}/{}'.format(get_drive_test_log_dir(test_name), new_log_filename))
  df_t_denorm.to_csv('{}/{}'.format(get_drive_test_log_dir(test_name), new_log_denorm_filename))


def calc_rmse_A(normalized_validation_examples, normalized_validation_targets, writer, df_rmse, target_key):
  # validation on every line of output file

  line_rmse = []
  for k in range(1,29):
    df_examples = normalized_validation_examples.copy()
    df_examples_k = df_examples[df_examples['k'] == k]
    df_targets = normalized_validation_targets.copy()
    df_targets_k = df_targets[df_targets['k'] == k]
    df_targets_k.drop(columns=['k'], inplace=True)

    arr_input_val = df_examples_k.to_numpy()
    arr_output_val = df_targets_k.to_numpy()

    log_msg("predicting {} registers ".format(arr_input_val.shape[0]))
    start = time.time()
    y = model.predict(arr_input_val, use_multiprocessing=True)
    end = time.time()
    log_msg("Prediction time MULTIPROCESSING in {} seconds".format(end-start))
    print('rmse = ', rmse(y, arr_output_val))
    line_rmse.append(rmse(y, arr_output_val))
    
  df_rmse[target_key] = line_rmse

  return df_rmse

In [None]:
# all_out_columns = ["Tc", "qv", "qc", "qr", "qi", "qs", "qg", "ni", "ns", "nr", "NG", "NC", "EFFCS", "EFFIS", "LSRAIN", "LSSNOW"]
# df_t = pd.DataFrame(columns=all_out_columns)

# ???????
# df_t["Tc"]=[1,2]
# df_t["Tc"]=[1,2]


## Preparação para os testes Arquitetura C

In [None]:
def get_data_for_test(all_examples, all_targets, train_percent_size=0.9):

    all_minmax_values = {}
    for key in all_examples.columns:
      all_minmax_values[key] = [all_examples[key].min(), all_examples[key].max()]

    for key in all_targets.columns:
      if key not in all_minmax_values.keys():
        all_minmax_values[key] = [all_targets[key].min(), all_targets[key].max()]
      else:
        if all_targets[key].min() < all_minmax_values[key][0]:
          all_minmax_values[key][0] = all_targets[key].min()
        if all_targets[key].max() > all_minmax_values[key][1]:
          all_minmax_values[key][1] = all_targets[key].max()
        
    all_min_values = {k: all_minmax_values[k][0] for k in all_minmax_values.keys()}
    all_max_values = {k: all_minmax_values[k][1] for k in all_minmax_values.keys()}

    print('\n  all minmax values: ')
    print(all_minmax_values)
    
    normalized_all_examples = normalize_linear_scale(all_examples, all_minmax_values)
    normalized_all_targets = normalize_linear_scale(all_targets, all_minmax_values)

    normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
    get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, train_percent_size)
    
    return normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values
    

Após a execução dos testes, os logs ficam armazenados no google drive, para que não se percam na sessão do colab. Outro notebook foi criado para visualizar os resultados no Tensorboard.  
=>  https://colab.research.google.com/drive/1RiIXCWdcpGTdSLimPI3mJDWl7nj5fm1j?usp=sharing

## C) Testes com (k_max níveis x num_var) inputs e (k_max níveis x num_var outputs) (Uma predição por passagem na rotina)

- Utiliza 70% da base para treinamento, 20% para validação interna e 10% para validação independente
- Normalização linear em todas as var de entrada ou outras normalizações
- Faz a regressão para todas as variáveis de saída, para cada nível 

##Testes IOP 2014 (D) ##

- Idem C mas com IOP 2014
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- camada de saída com 16 variáveis x k_max níveis -27 níveis LSRAIN LSSONW =  394 outnputs
- Executar a rede neural com normalização linear
- Utilizando esquema de 10 splits e Early stop

In [None]:
levels = k_max
delta_t = 180
spin_hours = 24
k_inicial = 1
k_final = k_max

### Testes de números de neurônios
- Utilizando 10% ou 90% da base
- 500 passos
- Variando Neurônios, camadas


In [None]:
normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test(all_examples, all_targets, 0.9)

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()
del normalized_training_examples, normalized_training_targets
n_inputs = arr_input_train.shape[1]
n_outputs = arr_output_train.shape[1]

# Train model parameters
#
loss_type = 'mse'
epochs=200


# for hidden_layers in [[18], [36], [54], [72], [90], [108], [216], [432], 
#                       [18, 18], [36, 36], [54, 54], [72, 72], [90, 90], [108, 108], [216, 216], [432, 432], [448, 394], 
#                       [18, 18, 18], [36, 36, 36], [54, 54, 54], [72, 72, 72], [90, 90, 90], [108, 108, 108],[216, 216, 216], [432, 432, 432] ]:
for hidden_layers in [ [216], [432], [108, 108], [216, 216], [432, 432], [108, 108, 108],[216, 216, 216], [432, 432, 432]]:
  test_name = "Evaluate_D_90_{}".format(hidden_layers)
  drive_logs_test_dir = create_drive_test_log_dir(test_name)
  # drive_logs_test_dir = get_drive_test_log_dir(test_name)
  log_msg(" =========== Executando teste {} ==================".format(test_name))


  model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
  model.summary(print_fn=log_msg)

  log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
  log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

  model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

  # Train model
  log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
  train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=1)
  save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

  # use trained model
  # model = load_model((model_dir + '/tmp_model.h5'))

  validate_model(model, normalized_validation_examples, normalized_validation_targets)
  dic_var_default_values = { \
    'qc': 0.0,
    'qr': 0.0,
    'qi': 0.0,
    'qs': 0.0,
    'ni': 0.0,
    'ns': 0.0,
    'nr': 0.0,
    'NG': 0.0,
    'NC': 0.0,
    'EFFCS': 0.891634,
    'EFFIS': 0.123289	
  }
  calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values)



Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 448)]             0         
_________________________________________________________________
dense_11 (Dense)             (None, 216)               96984     
_________________________________________________________________
dense_12 (Dense)             (None, 394)               85498     
Total params: 182,482
Trainable params: 182,482
Non-trainable params: 0
_________________________________________________________________
Input  train variables/size = 448/156384
Output train variables/size = 394/156384 
Start training
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23

Testes utilizando CNNs

### Teste CNN.6
- Idem C.6 mas com CNN
- 1 feature com 448 timesteps ?
- Conv1D sobre os níveis ?

In [None]:
# TODO
# all_spin_examples, all_spin_targets, no_spinup_size = get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=True)    
# normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
# get_data_for_test(all_spin_examples, all_spin_targets)

use_levs = 'NoLevs'

# ********* agilizando ...

# normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
# get_data_for_test(all_examples, all_targets, 0.9)

# if use_levs == 'Levs':
#   dic_var_levels_exclude = { \
#     # 'sl': range(1,29),                          
#     'qv': range(20,29),
#     'qc': list(range(1,10)) + list(range(14,29)),
#     'qr': range(14,29),
#     'qi': list(range(1,12)) + list(range(20,29)),
#     'ni': list(range(1,11)) + list(range(20,29)),
#     'ns': list(range(1,4)) + [22, 29],
#     'nr': range(14,29),
#     'NC': list(range(1,9)) + list(range(14,29)),
#     'omega': range(23,29)
#   }
# else:
#   dic_var_levels_exclude = None

# normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
# normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final, dic_var_levels_exclude)
# normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
# normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final, dic_var_levels_exclude)

# arr_input_train = normalized_training_examples.to_numpy()
# arr_output_train = normalized_training_targets.to_numpy()
# # https://stackoverflow.com/questions/48140989/keras-lstm-input-dimension-setting
# # o mínimo de dimensões para a Conv1D são 3, então:
# #  n_inputs, 448 =>  n_inputs, 448 , 1
# arr_input_train = np.expand_dims(arr_input_train, 2)
# arr_output_train = np.expand_dims(arr_output_train, 2)
# del normalized_training_examples, normalized_training_targets
# n_inputs = arr_input_train.shape[1]
# n_outputs = arr_output_train.shape[1]


# Train model parameters
#
loss_type = 'mse'

epochs=100
n_splits=5


dropout = None
batch_norm_arr = [None]
n_filters = [8, 16, 32]
n_kernels = [5]
# hidden_layers_arr = [[108]]
hidden_layers_arr = [[None]]
max_pooling = None
batch_norm = None

for hidden_layers in hidden_layers_arr:
# #   for nf in n_filters:
# #     for nk in n_kernels:
# #       # for arr_convs_filters_kernels in [ [[nf, nk]], [[nf, nk], [nf, nk]], [[nf, nk], [nf, nk], [nf, nk]] ]:
# #       for arr_convs_filters_kernels in [ [[nf, nk]], [[nf, nk], [nf, nk]] ]:
# #         for batch_norm in batch_norm_arr:        
# #           for max_pooling in max_pooling_arr:
#             # Testar mais de uma camada pra tentar melhorar, mas com menos filtros pra reduzir parametros

#   for arr_convs_filters_kernels in [ [[8, 5]] ]:
  # for nf in n_filters:
  #   for nk in n_kernels:
  for arr_convs_filters_kernels in [ [[8, 5], [16, 5]], [[8, 5], [32, 5]], [[16, 5], [8, 5]], [[16, 5], [32, 5]], [[32, 5], [8, 5]], [[32, 5], [16, 5]] ]:
    test_name = "CNN_{}_filtKern_{}__pool_{}__hidd_{}__batch_{}__drop_{}".format(use_levs, arr_convs_filters_kernels, max_pooling, hidden_layers, batch_norm, dropout) 
    drive_logs_test_dir = get_drive_test_log_dir(test_name)
    if os.path.exists(drive_logs_test_dir): continue

    drive_logs_test_dir = create_drive_test_log_dir(test_name)
    log_msg(" =========== Executando teste {} ==================".format(test_name))

    model = get_cnn_model(n_inputs, n_outputs, arr_convs_filters_kernels, hidden_layers, batch_norm, dropout, max_pooling, loss=loss_type) 
    model.summary(print_fn=log_msg)

    log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
    log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

    model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

    # Train model
    log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)

    # tpu_strategy = use_tpu()
    # with tpu_strategy.scope():

    # ********* agilizando ...
    use_gpu()
    with tf.device('/device:GPU:0') as gpu:
      train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=n_splits)

    save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

    # ********* agilizando ...
    # use trained model
    # model = load_model((model_dir + '/tmp_model.h5'))

    validate_model(model, normalized_validation_examples, normalized_validation_targets, is_CNN=True)
    dic_var_default_values = { \
      'qc': 0.0,
      'qr': 0.0,
      'qi': 0.0,
      'qs': 0.0,
      'ni': 0.0,
      'ns': 0.0,
      'nr': 0.0,
      'NG': 0.0,
      'NC': 0.0,
      'EFFCS': 0.891634,
      'EFFIS': 0.123289	
    }
    calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values, is_CNN=True)


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_10 (Conv1D)           (None, 444, 8)            48        
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 440, 16)           656       
_________________________________________________________________
flatten_4 (Flatten)          (None, 7040)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 394)               2774154   
Total params: 2,774,858
Trainable params: 2,774,858
Non-trainable params: 0
_________________________________________________________________
Input  train variables/size = 448/156384
Output train variables/size = 394/156384 
Found GPU at: /device:GPU:0
Start training


(121632, 448, 1)

(34752, 448, 1)

(121632, 394, 1)

(34752, 394, 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

(121632, 448, 1)

(34752, 448, 1)

(121632, 394, 1)

(34752, 394, 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

(121632, 448, 1)

(34752, 448, 1)

(121632, 394, 1)

(34752, 394, 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100

### Teste CNN com timesteps
- 448 features de entrada
- n timesteps onde ocorre a conv1D


In [None]:
# a = np.arange(30)
# a1 = a.reshape(10,3)

# new_samples=5
# timesteps_window=2
# a2=a1.reshape(new_samples, timesteps_window, a1.shape[1])
# a3=a1.reshape(new_samples, timesteps_window * a1.shape[1])
# display.display(a1)
# display.display(a2)
# display.display(a3)




In [None]:
# TODO
# all_spin_examples, all_spin_targets, no_spinup_size = get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=True)    
# normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
# get_data_for_test(all_spin_examples, all_spin_targets)

use_levs = 'NoLevs'

# ********* agilizando ...

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test(all_examples, all_targets, 0.9)
if use_levs == 'Levs':
  dic_var_levels_exclude = { \
    # 'sl': range(1,29),                          
    'qv': range(20,29),
    'qc': list(range(1,10)) + list(range(14,29)),
    'qr': range(14,29),
    'qi': list(range(1,12)) + list(range(20,29)),
    'ni': list(range(1,11)) + list(range(20,29)),
    'ns': list(range(1,4)) + [22, 29],
    'nr': range(14,29),
    'NC': list(range(1,9)) + list(range(14,29)),
    'omega': range(23,29)
  }
else:
  dic_var_levels_exclude = None
normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final, dic_var_levels_exclude)

# ********* fim agilizando ...

# del normalized_training_examples, normalized_training_targets


arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()
arr_input_val = normalized_validation_examples.to_numpy()
arr_output_val = normalized_validation_targets.to_numpy()


timesteps_window=2

# TRAIN
new_samples=int(arr_input_train.shape[0]/timesteps_window)
arr_input_train = arr_input_train[0:new_samples*timesteps_window,:]
arr_input_train = arr_input_train.reshape(new_samples, timesteps_window, arr_input_train.shape[1])
arr_output_train = arr_output_train[0:new_samples*timesteps_window,:]
arr_output_train = arr_output_train.reshape(new_samples, timesteps_window * arr_output_train.shape[1])

# VALIDATION
new_samples = int(arr_input_val.shape[0]/timesteps_window)
arr_input_val = arr_input_val[0:new_samples*timesteps_window,:]
arr_input_val = arr_input_val.reshape(new_samples, timesteps_window, arr_input_val.shape[1])
arr_output_val = arr_output_val[0:new_samples*timesteps_window,:]
arr_output_val = arr_output_val.reshape(new_samples, timesteps_window * arr_output_val.shape[1])

n_inputs = arr_input_train.shape[2]
n_outputs = arr_output_train.shape[1]


# Train model parameters
#
loss_type = 'mse'

epochs=100
n_splits=5


dropout = None
# for hidden_layers in [ [216], [432], [216, 216], [432, 432] ]:  
# hidden_layers_arr = [ [None], [216], [432], [216, 216], [432, 432] ]
hidden_layers_arr = [ [None], [108] ]
batch_norm_arr = [None]
max_pooling_arr = [None, 2]
n_filters = [32, 64]

# melhor 5
n_kernels = [2]
batch_norm = None
  # arr_convs_filters_kernels = [ [32, 3] ]

for hidden_layers in hidden_layers_arr:
  for nf in n_filters:
    for nk in n_kernels:
      for max_pooling in max_pooling_arr:
        # for arr_convs_filters_kernels in [ [[nf, nk]], [[nf, nk], [nf, nk]], [[nf, nk], [nf, nk], [nf, nk]] ]:
        for arr_convs_filters_kernels in [ [[nf, nk]] ]:
#         for batch_norm in batch_norm_arr:        


          test_name = "CNN_TIMESTEPS__{}_filtKern_{}__pool_{}__hidd_{}__batch_{}__drop_{}__timesteps_{}".format(use_levs, arr_convs_filters_kernels, max_pooling, hidden_layers, batch_norm, dropout, timesteps_window) 
          drive_logs_test_dir = get_drive_test_log_dir(test_name)
          if os.path.exists(drive_logs_test_dir): 
            log_msg(" Teste existente, pulando: {}".format(test_name))
            continue

          drive_logs_test_dir = create_drive_test_log_dir(test_name)
          log_msg(" =========== Executando teste {} ==================".format(test_name))

          model = get_cnn_model(n_inputs, n_outputs, arr_convs_filters_kernels, hidden_layers, batch_norm, dropout, max_pooling, loss=loss_type, timesteps_window=timesteps_window) 
          model.summary(print_fn=log_msg)

          log_msg( "Input  train size/window/variables = {}/{}/{}".format(arr_input_train.shape[0], arr_input_train.shape[1], arr_input_train.shape[2]))
          log_msg( "Output train size/window*variables = {}/{}".format(arr_output_train.shape[0], arr_output_train.shape[1]))

          model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

          # Train model
          log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
          # ********* agilizando ...
          use_gpu()
          with tf.device('/device:GPU:0') as gpu:
            train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=n_splits)

          save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

          # ********* agilizando ...
          # use trained model
          # model = load_model((model_dir + '/tmp_model.h5'))
          validate_model_cnn_timesteps(model, arr_input_val, arr_output_val)
          dic_var_default_values = { \
            'qc': 0.0,
            'qr': 0.0,
            'qi': 0.0,
            'qs': 0.0,
            'ni': 0.0,
            'ns': 0.0,
            'nr': 0.0,
            'NG': 0.0,
            'NC': 0.0,
            'EFFCS': 0.891634,
            'EFFIS': 0.123289	
          }
          # calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values, is_CNN=True)



  all minmax values: 
{'k': [1, 28], 'si': [0.0036691060265900003, 1.0], 'Tc': [63.503047882, 308.96236853799996], 'qv': [2.92705568e-06, 0.0162761402701], 'qc': [0.0, 0.000407646693245], 'qr': [0.0, 0.0008443108657339999], 'qi': [0.0, 0.000148722695016], 'qs': [0.0, 0.0016289144075799999], 'qg': [0.0, 0.00038817307111400006], 'ni': [0.0, 1145094.4354], 'ns': [0.0, 2942266.54164], 'nr': [0.0, 1521556.56989], 'NG': [0.0, 6038.50500981], 'NC': [0.0, 35494575.1176], 'tke': [0.0, 6.0], 'kzh': [0.03, 300.0], 'omega': [-1.0769567015000001, 0.5418370767099999], 'EFFCS': [1.0, 27.5819925509], 'EFFIS': [13.0, 130.0], 'LSRAIN': [-1.0830010430299997e-24, 0.00016314062727399998], 'LSSNOW': [-1.0475281590399999e-28, 9.048727267059999e-11]}
train size = 4378752
Index(['k', 'si', 'Tc', 'qv', 'qc', 'qr', 'qi', 'qs', 'qg', 'ni', 'ns', 'nr',
       'NG', 'NC', 'tke', 'kzh', 'omega'],
      dtype='object')


# Resultados

In [None]:
def generate_skill_graphic(skill, skill_fname, key, arr_test, arr_markers, arr_colors, plot_sci_numbers=True):
  for test, mark, color in zip(arr_test, arr_markers, arr_colors):
    path = '{}/{}/{}'.format(exp1_dir, test, skill_fname)  
    df_skill = pd.read_csv(path)
    skill_values = df_skill[key].values
    plt.plot(skill_values, range(1, len(skill_values)+1), label=test + ' ' + skill, marker=mark, color=color)
  if plot_sci_numbers:
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
  plt.legend()


def generate_subplot(ncols, nskills, idx_plot, key):
  plt.subplot(ncols, nskills, idx_plot)
  plt.ylabel("k")
  plt.xlabel(key)


def generate_all_graphics(arr_test, cols, obs_file_name, pred_file_name, arr_skills, arr_skill_filenames, arr_markers, arr_colors, plot_sci_numbers=True):
  idx_plot = 1
  plt.figure(figsize=(20, 120))
  for key in cols:
      if key == 'Unnamed: 0': 
        continue
      generate_subplot(len(cols)-1, len(arr_skills)+1, idx_plot, key)

      mean_obs_path = '{}/{}/{}'.format(exp1_dir, arr_test[0], obs_file_name)
      df_mean_obs = pd.read_csv(mean_obs_path)
      obs_values = df_mean_obs[key].values
      plt.plot(obs_values, range(1, len(obs_values)+1), label='mean obs',  marker='o', linestyle='dashed', color='yellow', markersize=12)
      
      generate_skill_graphic('mean pred', pred_file_name, key, arr_test, arr_markers, arr_colors, plot_sci_numbers)
      idx_plot += 1

      for skill, skill_fname in zip(arr_skills, arr_skill_filenames):
        generate_subplot(len(cols)-1, len(arr_skills)+1, idx_plot, key)
        generate_skill_graphic(skill, skill_fname, key, arr_test, arr_markers, arr_colors, plot_sci_numbers)
        idx_plot += 1
  
  plt.subplots_adjust(hspace=0.2, wspace=0.1)
  plt.show()



# exp1_dir = '/content/drive/My Drive/Colab Notebooks/logs_models_rttmg_dt60'
exp1_dir = '/content/drive/My Drive/Colab Notebooks/logs'
exp1_dir = tb_logdir_base

arr_test = [ 'teste_C.6', 'teste_C.6.4_no_spinup', 'teste_C.6.5' ]
arr_markers = ['D', 'X', 'P', 's']
arr_colors = ['blue','green', 'violet', 'red']

# arr_test = [ 'teste_C.6']
# arr_markers = ['D']
# arr_colors = ['blue']


# arr_markers = ['D', 'X', 'P', 's']
# arr_colors = ['blue', 'green', 'red', 'violet']
dummy_path = '{}/{}/{}'.format(exp1_dir, 'teste_C.6.4', 'log_mean_obs.csv')
dummy_df = pd.read_csv(dummy_path)

arr_skills = ['bias', 'mse', 'mae']

obs_file_name = 'log_mean_obs.csv'
pred_file_name = 'log_mean_pred.csv'
arr_skill_filenames = ['log_bias.csv', 'log_mse.csv', 'log_mae.csv']
generate_all_graphics(arr_test, dummy_df.columns.to_list(), obs_file_name, pred_file_name, arr_skills, arr_skill_filenames, arr_markers, arr_colors)


In [None]:
arr_test = [ 'teste_D.6']
arr_markers = ['D']
arr_colors = ['blue']
generate_all_graphics(arr_test, dummy_df.columns.to_list(), obs_file_name, pred_file_name, arr_skills, arr_skill_filenames, arr_markers, arr_colors)

#Testes anteriores#

### Teste C.1
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 18 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.1"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [18]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


#### Teste C.1.1
- Idem ao C.1, mas removendo normalização para cálculo do rmse
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 18 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.1.1"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = all_examples
normalized_all_targets = all_targets

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [18]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


#### Teste C.1.2
- Idem C.1 mas removendo alguns niveis e colunas, como em B.1.2
- camada de entrada com 396 inputs
- 1 camada oculta com 18 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.1.2"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  'qc':range(16,29),
  'qr':range(16,29),
  'qg':range(16,29),
  'nc':range(16,29),
  'nr':range(16,29),
  'ng':range(16,29),
  # 'sl':range(1,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [18]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.2
- Idem C.1 mas com 54 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 54 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.2"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [54]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


#### Teste C.2.2
- Idem C.2 mas removendo alguns niveis e colunas, como em B.1.2
- camada de entrada com (#TODO) inputs
- 1 camada oculta com 18 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.2.2"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  'qc':range(16,29),
  'qr':range(16,29),
  'qg':range(16,29),
  'nc':range(16,29),
  'nr':range(16,29),
  'ng':range(16,29),
  # 'sl':range(1,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [54]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.3
- Idem C.1 mas com 2 camadas com 17 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 2 camadas ocultas com 18 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.3"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [18, 18]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.4
- Idem C.1 mas com com 108 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 108 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.4"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [108]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


#### Teste C.4.2
- Idem C.4 mas removendo alguns niveis e colunas, como em B.1.2
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 108 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.4.2"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  'qc':range(16,29),
  'qr':range(16,29),
  'qg':range(16,29),
  'nc':range(16,29),
  'nr':range(16,29),
  'ng':range(16,29),
  # 'sl':range(1,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [108]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.5
- Idem C.1 mas com com 162 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 162 neurônios
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.5"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [162]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)



### Teste C.6
- Idem C.1 mas com com 216 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 216 neurônios
- camada de saída com 16 variáveis x k_max níveis -27 níveis LSRAIN LSSONW =  394 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.6"
levels = k_max
delta_t = 60
spin_hours = 24

all_spin_examples, all_spin_targets, no_spinup_size = get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=True)
    
normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test(all_spin_examples, all_spin_targets)


drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

# Train model
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

# use trained model
# model = load_model((model_dir + '/tmp_model.h5'))

validate_model(model, normalized_validation_examples, normalized_validation_targets)
dic_var_default_values = { \
  'qc': -1.0,
  'qr': -1.0,
  'qi': -1.0,
  'qs': -1.0,
  'ni': -1.0,
  'ns': -1.0,
  'nr': -1.0,
  'NG': -1.0,
  'NC': -1.0,
  'EFFCS': 0.64,
  'EFFIS': -0.8
}
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values)


# FKB Testing ...
#
print("colunas de entrada")
print(normalized_validation_examples.columns.values)
print("colunas de saída:")
print(normalized_validation_targets.columns.values)

arr_one_input_val = normalized_validation_examples.tail(1).to_numpy()
arr_one_output_val = normalized_validation_targets.tail(1).to_numpy()
print("\n\n FKB Testing")
print("input = ")
print(*arr_one_input_val[0], sep=", ")
print("output = ")
print(*arr_one_output_val[0], sep=", ")
           

y = model.predict(arr_one_input_val, use_multiprocessing=True)
print("prediction h5 = ")                  
print(y, sep=", ")
                  

from KerasWeightsProcessing.convert_weights import txt_to_h5, h5_to_txt

h5_to_txt(
    weights_file_name=model_dir + '/tmp_model.h5', 
    output_file_name=model_dir + '/fkb_model.txt'
)
copyfile(model_dir + '/fkb_model.txt', './fkb_model.txt')

print("\n\nprediction FORTRAN = \n ")                  
command_fkb = "../FKB/build/bin/test_keras ./fkb_model.txt"
!{command_fkb}

#### Teste C.6.2 - dt 360
- Idem C.6 mas removendo niveis da entrada e da saída
- camada de entrada com 396 inputs
- 1 camada oculta com 216 neurônios
- camada de saída com 396 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.6.2"

get_data_for_test()
drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)
print('Scale examples:', scale_examples)
print('Scale targets:', scale_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  'qc':range(16,29),
  'qr':range(16,29),
  'qg':range(16,29),
  'nc':range(16,29),
  'nr':range(16,29),
  'ng':range(16,29),
  # 'sl':range(1,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final, dic_var_levels_exclude)


arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)

#### Teste C.6.3 - dt 60

- Idem C.6 mas removendo niveis da entrada e da saída
- camada de entrada com 217 inputs
- 1 camada oculta com 216 neurônios
- camada de saída com 227 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.6.3"

get_data_for_test()

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  # 'sl': range(1,29),                          
  'qv': range(20,29),                          
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
  'qs': list(range(1,11)) + [27, k_max],
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  
  'qg': list(range(1,11)) + list(range(16,29)),

  'ni': list(range(1,25)) + list(range(26,29)),
  'ns': list(range(1,15)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,10)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),

  'tke': list(range(9,24)) + [25, 26],
  'kzh': range(10,23),
  'omega': range(23,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)

dic_var_levels_exclude = { \
  'qv': range(20,29),                          
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
  'qs': list(range(1,11)) + [27, k_max],
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  
  'qg': list(range(1,10)) + [22, 23, 27, k_max],

  'ni': list(range(1,25)) + list(range(26,29)),
  'ns': list(range(1,15)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,10)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),

  'EFFCS': [3, 4]  + list(range(13,29)),
  'EFFIS': list(range(1,12)) + list(range(20,24)) + list(range(26,29))
}

normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final, dic_var_levels_exclude)


arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)


# train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
# save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')
rmtree('./tmp_model', ignore_errors=True)
copytree(model_dir, './tmp_model')
model = load_model('./tmp_model')

validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)

#### Teste C.6.4 - dt 60 - no spinup

- Evolução do C.6.4 alterando níveis e sem spinup
- camada de entrada com TODO inputs
- 1 camada oculta com 216 neurônios
- camada de saída com TODO outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.6.4_no_spinup"
levels = k_max
delta_t = 60
spin_hours = 24

all_spin_examples, all_spin_targets, no_spinup_size = get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=True)
    
normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test(all_spin_examples, all_spin_targets)

drive_logs_test_dir = create_drive_test_log_dir(test_name)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  # 'sl': range(1,29),                          
  'qv': range(20,29),
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
  'qs': list(range(1,11)) + [27, k_max],
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  
  # 'qg': list(range(1,11)) + list(range(16,29)),
#   'ni': list(range(1,12)) + list(range(20,25)) + list(range(26,29)),
  'ni': list(range(1,25)) + list(range(26,29)),                          
  'ns': list(range(1,16)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,9)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),
  'tke': [1] + list(range(9,24)) + [25, 26],
  'kzh': range(10,23),
  'omega': range(23,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)

dic_var_levels_exclude = { \
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
  'qs': list(range(1,11)) + [27, k_max],
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  
  # 'qg': list(range(1,10)) + [22, 23, 27, k_max],
#   'ni': list(range(1,12)) + list(range(20,25)) + list(range(26,29)),
  'ni': list(range(1,25)) + list(range(26,29)),                          
  'ns': list(range(1,16)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,9)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),
  'EFFCS': list(range(1,8)) + list(range(9,29)),
  'EFFIS': list(range(1,29))
}

normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final, dic_var_levels_exclude)


arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()


# Train model parameters
#
loss_type = 'mse'
epochs=50
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

# Train model
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=1)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

# use trained model
# model = load_model((model_dir + '/tmp_model.h5'))

validate_model(model, normalized_validation_examples, normalized_validation_targets)
dic_var_default_values = { \
  'qc': -1.0,
  'qr': -1.0,
  'qi': -1.0,
  'qs': -1.0,
  'ni': -1.0,
  'ns': -1.0,
  'nr': -1.0,
  'NG': -1.0,
  'NC': -1.0,
  'EFFCS': 0.64,
  'EFFIS': -0.8
}
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values)

# FKB Testing ...
#
print("colunas de entrada")
print(normalized_validation_examples.columns.values)
print("colunas de saída:")
print(normalized_validation_targets.columns.values)

arr_one_input_val = normalized_validation_examples.tail(1).to_numpy()
arr_one_output_val = normalized_validation_targets.tail(1).to_numpy()
print("\n\n FKB Testing")
print("input = ")
print(*arr_one_input_val[0], sep=", ")
print("output = ")
print(*arr_one_output_val[0], sep=", ")
           

y = model.predict(arr_one_input_val, use_multiprocessing=True)
print("prediction h5 = ")                  
print(y, sep=", ")
                  

from KerasWeightsProcessing.convert_weights import txt_to_h5, h5_to_txt

h5_to_txt(
    weights_file_name=model_dir + '/tmp_model.h5', 
    output_file_name=model_dir + '/fkb_model.txt'
)
copyfile(model_dir + '/fkb_model.txt', './fkb_model.txt')

# print("\n\nprediction FORTRAN = \n ")                  
# command_fkb = "../FKB/build/bin/test_keras ./fkb_model.txt"
# !{command_fkb}

#### Teste C.6.5 - dt 60

- Idem C.4 mas removendo apenas niveis da entrada (na tentativa da rede tentar decifrar todos os níveis das saídas)

In [None]:
test_name = "teste_C.6.5"

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test()

drive_logs_test_dir = create_drive_test_log_dir(test_name)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  # 'sl': range(1,29),                          
  'qv': range(20,29),
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  

  # 'qg': list(range(1,11)) + list(range(16,29)),
  'ni': list(range(1,12)) + list(range(20,25)) + list(range(26,29)),
  'ns': list(range(1,16)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,9)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),
  'tke': [1] + list(range(9,24)) + [25, 26],
  'kzh': range(10,23),
  'omega': range(23,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)
arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

# Train model
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
# train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=10)
# save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

# use trained model
model = load_model((model_dir + '/tmp_model.h5'))

validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values)

# FKB Testing ...
#
print("colunas de entrada")
print(normalized_validation_examples.columns.values)
print("colunas de saída:")
print(normalized_validation_targets.columns.values)

arr_one_input_val = normalized_validation_examples.tail(1).to_numpy()
arr_one_output_val = normalized_validation_targets.tail(1).to_numpy()
print("\n\n FKB Testing")
print("input = ")
print(*arr_one_input_val[0], sep=", ")
print("output = ")
print(*arr_one_output_val[0], sep=", ")
           

y = model.predict(arr_one_input_val, use_multiprocessing=True)
print("prediction h5 = ")                  
print(y, sep=", ")
                  

from KerasWeightsProcessing.convert_weights import txt_to_h5, h5_to_txt

h5_to_txt(
    weights_file_name=model_dir + '/tmp_model.h5', 
    output_file_name=model_dir + '/fkb_model.txt'
)
copyfile(model_dir + '/fkb_model.txt', './fkb_model.txt')

print("\n\nprediction FORTRAN = \n ")                  
command_fkb = "../FKB/build/bin/test_keras ./fkb_model.txt"
!{command_fkb}


### Teste C.7
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camadas ocultas [476] - mesma qtdd de neurônios que a camada de entrada
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.7"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [476]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))


# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


#### Teste C.7.5 - dt 60

- Idem C.6.5 - removendo apenas niveis da entrada (na tentativa da rede tentar decifrar todos os níveis das saídas)

In [None]:
test_name = "teste_C.7.5"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

dic_var_levels_exclude = { \
  # 'sl': range(1,29),                          
  'qv': range(20,29),
  'qc': [3, 4] + list(range(13,29)),
  'qr': range(13,29),
  'qi': list(range(1,25)) + list(range(26,29)),
# TODO - checar porquê qg apresenta 0 e 1 na entrada, mas somente 0 na saída  

  # 'qg': list(range(1,11)) + list(range(16,29)),
  'ni': list(range(1,12)) + list(range(20,25)) + list(range(26,29)),
  'ns': list(range(1,16)) + [27, k_max],
  'nr': range(13,29),
  'NG': list(range(1,9)) + [22, 23, 27, k_max],
  'NC': [3, 4] + list(range(13,29)),
  'tke': [1] + list(range(9,24)) + [25, 26],
  'kzh': range(10,23),
  'omega': range(23,29)
}

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final, dic_var_levels_exclude)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final, dic_var_levels_exclude)

normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)


arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [476]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.8
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 2 camadas ocultas [216, 216]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.8"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216, 216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.9
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camadas ocultas  [476, 476]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.9"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [476, 476]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))




# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, hidden_layers, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_targets_min_values, all_targets_max_values)


### Teste C.10
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camadas ocultas [476, 216]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.10"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [476, 216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs, target_key)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)

### Teste C.11
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camadas ocultas [476, 448]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.11"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [476, 448]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs, target_key)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)

### Teste C.12
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- camadas ocultas [k_max, k_max, k_max]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.12"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [k_max, k_max, k_max]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs, target_key)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)

### Teste C.13
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- camadas ocultas [10, 10, 10]
- camada de saída com 16 variáveis x k_max níveis =  448 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

In [None]:
test_name = "teste_C.13"

drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

normalized_all_examples = normalize_linear_scale(all_examples)
normalized_all_targets = normalize_linear_scale(all_targets)

normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets = \
get_percent_normalized_trainining_and_validation(normalized_all_examples, normalized_all_targets, 0.9)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [10, 10, 10]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

# Train model
#
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_i### Teste C.6
- Idem C.1 mas com com 216 neurônios
- camada de entrada com 17 variáveis x k_max níveis = 476 inputs
- 1 camada oculta com 216 neurônios
- camada de saída com 16 variáveis x k_max níveis -27 níveis LSRAIN LSSONW =  394 outnputs
- Executar a rede nerual com normalização linear
- utilizando esquema de 5 splits e Early stop

test_name = "teste_C.6"
levels = k_max
delta_t = 60
spin_hours = 24

all_spin_examples, all_spin_targets, no_spinup_size = get_no_spinup_examples_and_targets(levels, delta_t, spin_hours, pre_process=True)
    
normalized_training_examples, normalized_training_targets, normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values = \
get_data_for_test(all_spin_examples, all_spin_targets)


drive_logs_test_dir = create_drive_test_log_dir(test_name)
# drive_logs_test_dir = get_drive_test_log_dir(test_name)

k_inicial = 1
k_final = k_max

normalized_training_examples = get_df_col_k(normalized_training_examples, k_inicial, k_final)
normalized_training_targets = get_df_col_k(normalized_training_targets, k_inicial, k_final)
normalized_validation_examples = get_df_col_k(normalized_validation_examples, k_inicial, k_final)
normalized_validation_targets = get_df_col_k(normalized_validation_targets, k_inicial, k_final)

arr_input_train = normalized_training_examples.to_numpy()
arr_output_train = normalized_training_targets.to_numpy()

# Train model parameters
#
loss_type = 'mse'
epochs=200
n_inputs = arr_input_train.shape[1]
hidden_layers = [216]  
n_outputs = arr_output_train.shape[1]

model = get_custom_model(n_inputs, n_outputs, hidden_layers, loss=loss_type) 
model.summary(print_fn=log_msg)

log_msg( "Input  train variables/size = {}/{}".format(n_inputs, arr_input_train.shape[0]))
log_msg( "Output train variables/size = {}/{} ".format(n_outputs, arr_output_train.shape[0]))

model_dir = get_mode_var_name_dir(test_name, n_inputs, hidden_layers, n_outputs, epochs, 'ALL_VARS')

# Train model
log_dir_var = get_tb_logdir_var(test_name, hidden_layers, epochs)
train_model(model, arr_input_train, arr_output_train, log_dir_var, epochs=epochs, n_splits=10)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)

# use trained model
# model = load_model((model_dir + '/tmp_model.h5'))

validate_model(model, normalized_validation_examples, normalized_validation_targets)
dic_var_default_values = { \
  'qc': -1.0,
  'qr': -1.0,
  'qi': -1.0,
  'qs': -1.0,
  'ni': -1.0,
  'ns': -1.0,
  'nr': -1.0,
  'NG': -1.0,
  'NC': -1.0,
  'EFFCS': 0.64,
  'EFFIS': -0.8
}
calc_skill_C(normalized_validation_examples, normalized_validation_targets, all_min_values, all_max_values, dic_var_default_values)


# FKB Testing ...
#
print("colunas de entrada")
print(normalized_validation_examples.columns.values)
print("colunas de saída:")
print(normalized_validation_targets.columns.values)

arr_one_input_val = normalized_validation_examples.tail(1).to_numpy()
arr_one_output_val = normalized_validation_targets.tail(1).to_numpy()
print("\n\n FKB Testing")
print("input = ")
print(*arr_one_input_val[0], sep=", ")
print("output = ")
print(*arr_one_output_val[0], sep=", ")
           

y = model.predict(arr_one_input_val, use_multiprocessing=True)
print("prediction h5 = ")                  
print(y, sep=", ")
                  

from KerasWeightsProcessing.convert_weights import txt_to_h5, h5_to_txt

h5_to_txt(
    weights_file_name=model_dir + '/tmp_model.h5', 
    output_file_name=model_dir + '/fkb_model.txt'
)
copyfile(model_dir + '/fkb_model.txt', './fkb_model.txt')

print("\n\nprediction FORTRAN = \n ")                  
command_fkb = "../FKB/build/bin/test_keras ./fkb_model.txt"
!{command_fkb}nput_train, arr_output_train, log_dir_var, epochs=epochs)
save_model_and_logs(model, test_name, n_inputs, n_outputs, hidden_layers, epochs, log_dir_var, drive_logs_test_dir)
validate_model(model, normalized_validation_examples, normalized_validation_targets)