## Package preparation

In [1]:
import os
import time

import numpy as np
import pandas as pd
from sklearn.utils import shuffle, resample

import tensorflow as tf
from tensorflow.contrib import rnn

print("Package is ready.")

  from ._conv import register_converters as _register_converters


Package is ready.


In [2]:
TIME_STEPS = 168
OUTPUT_SIZE = 1
INTERVAL = "1h"
N_FOLD = 5
EXPLANATION = "ion_LSTM_closed_interpolate"

## Data preparation

In [3]:
ion_label = pd.read_excel("../data/SW_Ion/Ion concentration_meq_20171109-20180426.xlsx", skiprows=1, index_col="date")

In [4]:
ion_label = ion_label[~(ion_label.index < '2018-01-12')]
ion_label.drop([columns for columns in ion_label.columns if "Unnamed:" in columns], axis= 1, inplace=True)
ion_label.drop([columns for columns in ion_label.columns if ".1" in columns], axis= 1, inplace=True)
ion_label.drop([columns for columns in ion_label.columns if ".2" in columns], axis= 1, inplace=True)
ion_label.drop([columns for columns in ion_label.columns if ".3" in columns], axis= 1, inplace=True)
ion_label.loc[[pd.Timestamp("2018-04-13")], :] = pd.np.nan
na_label = ion_label.copy()
na_label = na_label[na_label.isnull().any(axis=1)]
ion_label.dropna(inplace=True)

In [5]:
ion_label.index += pd.Timedelta("16h")

In [6]:
ion_label = ion_label.sample(frac=1.0)

In [7]:
raw_closed = pd.read_csv("./results/closed_data.csv", index_col="TIMESTAMP")
raw_closed.index = pd.DatetimeIndex(raw_closed.index)
raw_closed = raw_closed[np.abs(raw_closed-raw_closed.mean())<=(3*raw_closed.std())]
raw_closed["RootTemp(1)"][0] = 20
raw_closed.drop(["ECS(3)", "InjDrgVR(3)", "DrgDaily(3)", "DrgV(3)"], axis=1, inplace=True)

In [8]:
raw_closed.interpolate("linear", inplace=True)

# raw_closed.index = (raw_closed.index).round(INTERVAL)
# raw_closed = raw_closed.groupby(raw_closed.index).mean()

In [9]:
MAXS = raw_closed.max()
MINS = raw_closed.min()
LABELMAXS = ion_label.max()
LABELMINS = ion_label.min()

In [10]:
raw_closed = (raw_closed - MINS) / (MAXS - MINS)
ion_label = (ion_label - LABELMINS) / (LABELMAXS - LABELMINS)

In [11]:
env_input = []
for index in ion_label.index:
    last_time = index
    first_time = last_time - pd.Timedelta(INTERVAL)*(TIME_STEPS-1)
    
    TRAIN_RANGE = pd.date_range(first_time, last_time, freq=INTERVAL)
    env_input.append(raw_closed.loc[TRAIN_RANGE])

In [12]:
for i in range(len(env_input)):
    env_input[i] = env_input[i].values
    
env_input = np.array(env_input)

In [13]:
rnn_input = env_input
rnn_label = ion_label

In [14]:
env_input = []
for index in na_label.index:
    last_time = index
    first_time = last_time - pd.Timedelta(INTERVAL)*(TIME_STEPS-1)
    
    TRAIN_RANGE = pd.date_range(first_time, last_time, freq=INTERVAL)
    env_input.append(raw_closed.loc[TRAIN_RANGE])

In [15]:
for i in range(len(env_input)):
    env_input[i] = env_input[i].values
    
env_input = np.array(env_input)

In [16]:
env_input.shape

(54, 168, 23)

In [17]:
pred_input = env_input
pred_label = na_label

In [18]:
pred_input.shape

(54, 168, 23)

## Network

In [19]:
def FC(x, size, scope):
    return tf.contrib.layers.fully_connected(x, size, activation_fn=None, scope=scope)

def BN_FC(x, n_hidden, phase, scope):
    with tf.variable_scope(scope):
        h1 = tf.contrib.layers.fully_connected(x, n_hidden, activation_fn=None, scope='FC')
        h2 = tf.layers.batch_normalization(h1, training=phase)
        return tf.nn.relu(h2, 'relu')

In [20]:
for i in range(N_FOLD):
    EXPLANATION_ = "Ion_closed_altMT_LN" + "_I" + INTERVAL + "_T" + str(TIME_STEPS) + "_F" + str(i+1)
    EPSILON = 0.00001
    N_HIDDEN = 128
    N_FC_HIDDEN = 128
    DROP_PROB = 0.9
    N_INPUT = env_input[0].shape[1]
    N_OUTPUT = ion_label.shape[1]

    # tf Graph input
    x = tf.placeholder("float", [None, TIME_STEPS, N_INPUT])
    y = tf.placeholder("float", [None, N_OUTPUT])
    drop_prob = tf.placeholder("float")
    phase = tf.placeholder(tf.bool)

    # Define weights
    weights = tf.get_variable("F"+ str(i+1) + "wgt", shape=[N_HIDDEN, N_OUTPUT], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable("F"+ str(i+1) + "bias", shape=[N_OUTPUT], initializer=tf.contrib.layers.xavier_initializer())

    def RNN(x, weights, biases, drop_prob):

        x = tf.unstack(x, TIME_STEPS, 1)
        
        cell = rnn.LayerNormBasicLSTMCell(N_HIDDEN, dropout_keep_prob=drop_prob, reuse=tf.AUTO_REUSE)
        outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32)
        
        activation = tf.matmul(outputs[-1], weights) + biases

        return activation
    

    def Task_specific_FC(x, n_hidden, phase, tasknum):
        h1 = BN_FC(x, n_hidden, phase, "task" + str(tasknum) + "_layer1")
        h2 = BN_FC(h1, n_hidden, phase, "task" + str(tasknum) + "_layer2")
        
        return FC(h2, 1, "task" + str(tasknum) + "_out")


    # Construct model
    shared_layer = RNN(x, weights, biases, drop_prob)
    pred_1 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 1)
    pred_2 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 2)
    pred_3 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 3)
    pred_4 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 4)
    pred_5 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 5)
    pred_6 = Task_specific_FC(shared_layer, N_FC_HIDDEN, phase, 6)
    pred = tf.concat([pred_1, pred_2, pred_3, pred_4, pred_5, pred_6], axis=1)

    # Evaluate model
    total_error = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
    unexplained_error = tf.reduce_sum(tf.square(tf.subtract(y, pred)))
    R_squared = tf.subtract(1., tf.divide(unexplained_error, total_error))
    accuracy = R_squared

    # Initializing the variables
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
    # Do some optimizations
    sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth =True)))
    sess.run(init)

    #Choose best model
    bestpath = "./models/best_trained_" + EXPLANATION_ + ".ckpt"
    saver.restore(sess, bestpath)


    train_acc = sess.run(accuracy, feed_dict={x: rnn_input, y: rnn_label, drop_prob: 1.0, phase: 0})
    print ("Training Acc: %.3f" % (train_acc))

    prediction = sess.run(pred, feed_dict={x: pred_input, drop_prob: 1.0, phase: 0})
    prediction = (prediction * (LABELMAXS.values - LABELMINS.values)) + LABELMINS.values
    pred_df = pd.DataFrame(prediction)
    pred_df.index = pred_label.index
    pred_df.columns = ["K", "Ca", "Mg", "S", "N", "P"]

    ion_results = pd.concat([pred_df], axis = 1)
    ion_results.to_csv("./results/results_" + EXPLANATION + "_F" + str(i+1) + ".csv")
    
    tf.reset_default_graph()
    sess.close()

INFO:tensorflow:Restoring parameters from ./models/best_trained_Ion_closed_altMT_LN_I1h_T168_F1.ckpt
Training Acc: 0.525
INFO:tensorflow:Restoring parameters from ./models/best_trained_Ion_closed_altMT_LN_I1h_T168_F2.ckpt
Training Acc: 0.953
INFO:tensorflow:Restoring parameters from ./models/best_trained_Ion_closed_altMT_LN_I1h_T168_F3.ckpt
Training Acc: 0.896
INFO:tensorflow:Restoring parameters from ./models/best_trained_Ion_closed_altMT_LN_I1h_T168_F4.ckpt
Training Acc: 0.891
INFO:tensorflow:Restoring parameters from ./models/best_trained_Ion_closed_altMT_LN_I1h_T168_F5.ckpt
Training Acc: 0.913
