In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.metrics import mean_squared_error
import csv
import scipy.stats as stats
from tensorboard.plugins.hparams import api as hp
import scipy.stats as stats
plt.rcParams["figure.figsize"] = (20,10)

import seaborn as sns
from Data import Data
from datetime import datetime
import feature_finder_keiko as ff
import h5py

In [6]:
tau = 6

n_components_sst = 98

link_caus = np.load("./link_rotated_station/best_link_1976_2010_{}.npy".format(tau))
link_corr = np.load("./link_rotated_station/best_link_corr_1976_2010_{}.npy".format(tau))

In [8]:
file_name = '../../../nc/sst.mnmean.nc'

In [10]:
train_start = 1976
validation_end = 2010

temporal_limits = {"time_min":datetime(train_start, 1, 1, 0, 0),"time_max":datetime(validation_end, 12, 1, 0, 0)}
original_count, count = ff.drought_timeseries("../npy_files/ET_gamma_18912015.npy",train_start,validation_end)
#data_sst, ts, V, df_sst, avg, std = ff.PCA_computer_rotated(file_name, "sst",temporal_limits, n_components_sst, -9.96921e+36)

In [11]:
data_sst = np.load("data_sst.npy")
V = np.load("V.npy")
ts = np.load("ts.npy")
avg = np.load("avg.npy")
std = np.load("std.npy")
data_sst_test = np.load("data_sst_test.npy")

In [14]:
df_sst = pd.read_csv("df_sst.csv")

In [13]:
test_start = 2011
test_end = 2015

temporal_limits_test = {"time_min":datetime(test_start, 1, 1, 0, 0),"time_max":datetime(test_end, 12, 1, 0, 0)}
#data_sst_test = ff.data_generator_avg_std('../../../nc/sst.mnmean.nc', "sst",temporal_limits_test, avg, std, 12, -9.96921e+36)
original_count_test, count_test = ff.drought_timeseries("../npy_files/ET_gamma_18912015.npy",test_start,test_end)

In [20]:
del df_sst["Unnamed: 0"]

In [23]:
link = link_caus

start_lag = tau
end_lag = tau+12

df = pd.DataFrame({"drought": original_count})
lags = np.arange(start_lag,end_lag + 1)
df = df.assign(**{
'{} (t-{})'.format(col, t): df[col].shift(t)
for t in lags
for col in df
})
for k in range(len(link)):
    df[str(k)] = ff.time_series_maker_V(data_sst, V[:,link[k,0]-1])
    df[str(k)] = df[str(k)].shift(abs(link[k,1]))
df = df.dropna()

x_train = df.iloc[:,1:]
y_train = df.iloc[:,0]

In [25]:
start_lag = tau
end_lag = tau + 12

df_test = pd.DataFrame({"drought":original_count_test})
lags = np.arange(start_lag,end_lag + 1)
df_test = df_test.assign(**{
'{} (t-{})'.format(col, t): df_test[col].shift(t)
for t in lags
for col in df_test
})
for k in range(len(link)):
    df_sst["pc"] = V[:,link[k,0]-1]
    df_test[str(k)] = ff.time_series_maker(link[k,0]-1, df_sst, data_sst_test)
    df_test[str(k)] = df_test[str(k)].shift(abs(link[k,1]))
df_test = df_test.dropna()

x_test = df_test.iloc[:,1:]
y_test = df_test.iloc[:,0]

In [29]:
!rm -rf ./logs/

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [38]:
HP_NUM_UNITS1 = hp.HParam('num_units 1', hp.Discrete([32])) 
HP_NUM_UNITS2 = hp.HParam('num_units 2', hp.Discrete([32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.29,0.3))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd','RMSprop']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.009,.01))
METRIC_RMSE = 'RootMeanSquaredError'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_NUM_UNITS1,HP_NUM_UNITS2, HP_DROPOUT,HP_L2 ,HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_RMSE, display_name='RMSE')],
  )

def train_test_model(hparams):
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape = x_train.shape[1]),
        layers.Dense(hparams[HP_NUM_UNITS1], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
        layers.Dropout(hparams[HP_DROPOUT]),
        layers.Dense(hparams[HP_NUM_UNITS2], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
        layers.Dense(1)
    ])

    model.compile(optimizer=hparams[HP_OPTIMIZER],
                loss='mean_squared_error',
                metrics=['RootMeanSquaredError'])

    model.fit(x_train, y_train, epochs=100,verbose=False) 
    _, rmse = model.evaluate(x_test, y_test)
    return rmse

def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        rmse = train_test_model(hparams)
        tf.summary.scalar(METRIC_RMSE, rmse, step=1)
    return(rmse, hparams)

session_num = 0
min_rmse = float('inf')
best_hparams = {}
for num_units1 in HP_NUM_UNITS1.domain.values:
    for num_units2 in HP_NUM_UNITS2.domain.values:
        for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
            for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
                for optimizer in HP_OPTIMIZER.domain.values:
                    hparams = {
                        HP_NUM_UNITS1: num_units1,
                        HP_NUM_UNITS2: num_units2,
                        HP_DROPOUT: dropout_rate,
                        HP_L2: l2,
                        HP_OPTIMIZER: optimizer
                    }
                    run_name = "run-%d" % session_num
                    print('--- Starting trial: %s' % run_name)
                    print({h.name: hparams[h] for h in hparams})
                    rmse, current_hparams = run('logs/hparam_tuning/' + run_name, hparams)
                    if (~np.isnan(rmse)) and (rmse < min_rmse): 
                        best_hparams = current_hparams
                        min_rmse = rmse
                        print("mis_rmse {}".format(min_rmse))
                    session_num += 1

--- Starting trial: run-0
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.009, 'optimizer': 'RMSprop'}
mis_rmse 1149.0103759765625
--- Starting trial: run-1
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.009, 'optimizer': 'adam'}
mis_rmse 1045.908203125
--- Starting trial: run-2
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.009, 'optimizer': 'sgd'}
--- Starting trial: run-3
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.01, 'optimizer': 'RMSprop'}
--- Starting trial: run-4
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.01, 'optimizer': 'adam'}
mis_rmse 953.1653442382812
--- Starting trial: run-5
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.29, 'l2 regularizer': 0.01, 'optimizer': 'sgd'}
--- Starting trial: run-6
{'num_units 1': 32, 'num_units 2': 32, 'dropout': 0.3, 'l2 regularizer': 0.009, 'optimizer': 'RMSprop'}
mis_rmse 647.248596191

In [28]:
%tensorboard --logdir logs/hparam_tuning