In [32]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
from tensorboard.plugins.hparams import api as hp

In [45]:
HP_NUM_UNITS1 = hp.HParam('num_units 1', hp.Discrete([4,8,16])) 
HP_NUM_UNITS2 = hp.HParam('num_units 2', hp.Discrete([4,8]))
#HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.3))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd','RMSprop']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.001,.01))
METRIC_RMSE = 'RootMeanSquaredError'

n = 26
# Set forecasting window length (in years)
m = 13
# Set annual sampling rate
f = 12 

freq = 12
h = m*f
n_steps = 12

In [46]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


def train_test_split_ts_2d_raw(X, h):
    return(X[:-h],X[-h:])

In [47]:
rmse_list = []
for i in range(6):
    file_name ="../extreme_data/raw12_{}.csv".format(i) 
    data = pd.read_csv(file_name)
    
    x = data.iloc[1:,1].values
    oni = data.iloc[1:,2].values
    

    
    #raw_seq = x[index:index+(n+m)*f]
    X, y = split_sequence(x, n_steps)

    #oni_seq = oni[index:index+(n+m)*f]
    X_oni, _ = split_sequence(oni, n_steps)

    #X = np.hstack((X_oni,X))
    X = X_oni

    #X_norm = (X - X.mean(0))/X.std(0)

    x_train, x_test = train_test_split_ts_2d_raw(X, h)
    y_train, y_test = train_test_split_ts_2d_raw(y, h)

    def train_test_model(hparams):
        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape = x_train.shape[1]),
            layers.Dense(hparams[HP_NUM_UNITS1], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
            layers.Dropout(0.1),
            layers.Dense(hparams[HP_NUM_UNITS2], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
            layers.Dense(1)
        ])

        model.compile(optimizer=hparams[HP_OPTIMIZER],
                    loss='mean_squared_error',
                    metrics=['RootMeanSquaredError'])

        model.fit(x_train, y_train, epochs=100,verbose=False) 
        _, rmse = model.evaluate(x_test, y_test, verbose=False)
        return rmse

    def run( hparams):
        #with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        rmse = train_test_model(hparams)
        #    tf.summary.scalar(METRIC_RMSE, rmse, step=1)
        return(rmse, hparams)

    session_num = 0
    min_rmse = float('inf')
    best_hparams = {}
    for num_units1 in HP_NUM_UNITS1.domain.values:
        for num_units2 in HP_NUM_UNITS2.domain.values:
            #for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
            for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
                for optimizer in HP_OPTIMIZER.domain.values:
                    hparams = {
                        HP_NUM_UNITS1: num_units1,
                        HP_NUM_UNITS2: num_units2,
                        #HP_DROPOUT: dropout_rate,
                        HP_L2: l2,
                        HP_OPTIMIZER: optimizer
                    }
                    run_name = "run-%d" % session_num
                    #print('--- Starting trial: %s' % run_name)
                    #print({h.name: hparams[h] for h in hparams})
                    rmse, current_hparams = run(hparams)
                    if ~np.isnan(rmse) and rmse < min_rmse: 
                        best_hparams = current_hparams
                        min_rmse = rmse
                    session_num += 1

    #params = list(best_hparams.values())
    rmse_list.append(min_rmse)
    #result_list.append(np.array(rmse_list).mean())

np.save("ONIonly.npy",np.array(rmse_list))
    

In [48]:
onionly = rmse_list

In [36]:
spi = rmse_list

In [29]:
raw = rmse_list

In [54]:
spioni = np.load("SPIWithONI_NoNormal.npy")

In [51]:
rawoni_noNormal = np.load("resultWithONI_NoNormal.npy")

In [55]:
pd.DataFrame({"Raw without Enso":raw,"Raw with Enso":rawoni_noNormal, 
              "SPI without Enso":spi, "SPI with Enso":spioni
               })

Unnamed: 0,Raw without Enso,Raw with Enso,SPI without Enso,SPI with Enso
0,4.993443,4.990809,6.818307,6.532733
1,2.106033,1.974325,3.605254,3.397066
2,1.383949,1.518489,2.968959,3.099936
3,3.077428,3.061981,4.690198,4.440694
4,1.524236,1.571057,3.117165,3.128671
5,1.612553,1.545339,2.509673,2.369509


# 12 Months:

In [56]:
raw_corr = np.array([-0.314, 0.195, -0.018,-0.277, -0.029, 0.188])

In [57]:
anomaly_corr = np.array([-0.307, 0.191, -0.013,-0.276, -0.033, 0.192])

In [58]:
SPI_corr = np.array([-0.273, 0.263, 0.149, -0.189, -0.093, 0.326])

In [59]:
pd.DataFrame({"Raw data":raw_corr,"Anomaly data":anomaly_corr, 
              "SPI":SPI_corr})

Unnamed: 0,Raw data,Anomaly data,SPI
0,-0.314,-0.307,-0.273
1,0.195,0.191,0.263
2,-0.018,-0.013,0.149
3,-0.277,-0.276,-0.189
4,-0.029,-0.033,-0.093
5,0.188,0.192,0.326


# 6 Months

In [60]:
raw_corr = np.array([-0.197, 0.021, -0.022,-0.145, -0.045, 0.129])

In [61]:
anomaly_corr = np.array([-0.06, 0.064, 0.018,-0.070, -0.009, 0.084])

In [62]:
SPI_corr = np.array([-0.153, 0.034, -0.018, -0.112, -0.081, 0.094])

In [63]:
pd.DataFrame({"Raw data":raw_corr,"Anomaly data":anomaly_corr, 
              "SPI":SPI_corr})

Unnamed: 0,Raw data,Anomaly data,SPI
0,-0.197,-0.06,-0.153
1,0.021,0.064,0.034
2,-0.022,0.018,-0.018
3,-0.145,-0.07,-0.112
4,-0.045,-0.009,-0.081
5,0.129,0.084,0.094


# 3 Months:

In [None]:
raw_corr = np.array([-0.082, 0.005, -0.007, -0.101, -0.026, 0.092])

In [None]:
anomaly_corr = np.array([-0.022, 0.032, 0.013, -0.036, 0.005, 0.079])

In [64]:
SPI_corr = np.array([-0.079, 0.002, -0.035, -0.091, -0.017, 0.034])

In [65]:
pd.DataFrame({"Raw data":raw_corr,"Anomaly data":anomaly_corr, 
              "SPI":SPI_corr})

Unnamed: 0,Raw data,Anomaly data,SPI
0,-0.197,-0.06,-0.079
1,0.021,0.064,0.002
2,-0.022,0.018,-0.035
3,-0.145,-0.07,-0.091
4,-0.045,-0.009,-0.017
5,0.129,0.084,0.034


In [25]:
result_list = []
for i in range(6):
    file_name ="../extreme_data/raw12_{}.csv".format(i) 
    data = pd.read_csv(file_name)
    
    x = data.iloc[1:,1].values
    #oni = data.iloc[1:,2].values
    
    rmse_list = []
    for index in range(0,int((L - (n+m)*f)/12)):
        raw_seq = x[index:index+(n+m)*f]
        X, y = split_sequence(raw_seq, n_steps)

        #oni_seq = oni[index:index+(n+m)*f]
        #X_oni, _ = split_sequence(oni_seq, n_steps)

        #X = np.hstack((X_oni,X))

        #X_norm = (X - X.mean(0))/X.std(0)

        x_train, x_test = train_test_split_ts_2d_raw(X, h)
        y_train, y_test = train_test_split_ts_2d_raw(y, h)

        def train_test_model(hparams):
            model = tf.keras.Sequential([
                tf.keras.layers.InputLayer(input_shape = x_train.shape[1]),
                layers.Dense(hparams[HP_NUM_UNITS1], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
                layers.Dropout(0.1),
                layers.Dense(hparams[HP_NUM_UNITS2], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
                layers.Dense(1)
            ])

            model.compile(optimizer=hparams[HP_OPTIMIZER],
                        loss='mean_squared_error',
                        metrics=['RootMeanSquaredError'])

            model.fit(x_train, y_train, epochs=100,verbose=False) 
            _, rmse = model.evaluate(x_test, y_test, verbose=False)
            return rmse

        def run( hparams):
            #with tf.summary.create_file_writer(run_dir).as_default():
            hp.hparams(hparams)  # record the values used in this trial
            rmse = train_test_model(hparams)
            #    tf.summary.scalar(METRIC_RMSE, rmse, step=1)
            return(rmse, hparams)

        session_num = 0
        min_rmse = float('inf')
        best_hparams = {}
        for num_units1 in HP_NUM_UNITS1.domain.values:
            for num_units2 in HP_NUM_UNITS2.domain.values:
                #for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
                for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
                    for optimizer in HP_OPTIMIZER.domain.values:
                        hparams = {
                            HP_NUM_UNITS1: num_units1,
                            HP_NUM_UNITS2: num_units2,
                            #HP_DROPOUT: dropout_rate,
                            HP_L2: l2,
                            HP_OPTIMIZER: optimizer
                        }
                        run_name = "run-%d" % session_num
                        #print('--- Starting trial: %s' % run_name)
                        #print({h.name: hparams[h] for h in hparams})
                        rmse, current_hparams = run(hparams)
                        if ~np.isnan(rmse) and rmse < min_rmse: 
                            best_hparams = current_hparams
                            min_rmse = rmse
                        session_num += 1

        params = list(best_hparams.values())
        rmse_list.append(min_rmse)
    result_list.append(np.array(rmse_list).mean())

np.save("resultWithoutONI.npy",np.array(result_list))
    

NameError: name 'L' is not defined

In [47]:
x = data.iloc[1:,1].values
oni = data.iloc[1:,2].values

In [49]:
rmse_list = []
for index in range(0,int((L - (n+m)*f)/12)):
    raw_seq = x[index:index+(n+m)*f]
    X, y = split_sequence(raw_seq, n_steps)

    #oni_seq = oni[index:index+(n+m)*f]
    #X_oni, _ = split_sequence(oni_seq, n_steps)

    #X = np.hstack((X_oni,X))

    #X_norm = (X - X.mean(0))/X.std(0)

    x_train, x_test = train_test_split_ts_2d_raw(X, h)
    y_train, y_test = train_test_split_ts_2d_raw(y, h)

    def train_test_model(hparams):
        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape = x_train.shape[1]),
            layers.Dense(hparams[HP_NUM_UNITS1], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
            layers.Dropout(0.1),
            layers.Dense(hparams[HP_NUM_UNITS2], kernel_regularizer=tf.keras.regularizers.l2(0.001), activation=tf.nn.relu),
            layers.Dense(1)
        ])

        model.compile(optimizer=hparams[HP_OPTIMIZER],
                    loss='mean_squared_error',
                    metrics=['RootMeanSquaredError'])

        model.fit(x_train, y_train, epochs=100,verbose=False) 
        _, rmse = model.evaluate(x_test, y_test, verbose=False)
        return rmse

    def run( hparams):
        #with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        rmse = train_test_model(hparams)
        #    tf.summary.scalar(METRIC_RMSE, rmse, step=1)
        return(rmse, hparams)

    session_num = 0
    min_rmse = float('inf')
    best_hparams = {}
    for num_units1 in HP_NUM_UNITS1.domain.values:
        for num_units2 in HP_NUM_UNITS2.domain.values:
            #for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
            for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
                for optimizer in HP_OPTIMIZER.domain.values:
                    hparams = {
                        HP_NUM_UNITS1: num_units1,
                        HP_NUM_UNITS2: num_units2,
                        #HP_DROPOUT: dropout_rate,
                        HP_L2: l2,
                        HP_OPTIMIZER: optimizer
                    }
                    run_name = "run-%d" % session_num
                    #print('--- Starting trial: %s' % run_name)
                    #print({h.name: hparams[h] for h in hparams})
                    rmse, current_hparams = run(hparams)
                    if ~np.isnan(rmse) and rmse < min_rmse: 
                        best_hparams = current_hparams
                        min_rmse = rmse
                    session_num += 1

    params = list(best_hparams.values())
    rmse_list.append(min_rmse)

In [41]:
best_hparams

{HParam(name='num_units 1', domain=Discrete([4, 8, 16]), display_name=None, description=None): 16,
 HParam(name='num_units 2', domain=Discrete([4, 8]), display_name=None, description=None): 4,
 HParam(name='l2 regularizer', domain=RealInterval(0.001, 0.01), display_name=None, description=None): 0.01,
 HParam(name='optimizer', domain=Discrete(['RMSprop', 'adam', 'sgd']), display_name=None, description=None): 'RMSprop'}

In [51]:
np.array(rmse_list).mean()

4.961028

In [None]:


#with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
#    hp.hparams_config(
#    hparams=[HP_NUM_UNITS1,HP_NUM_UNITS2, HP_DROPOUT,HP_L2 ,HP_OPTIMIZER],
#    metrics=[hp.Metric(METRIC_RMSE, display_name='RMSE')],
#  )

