In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

import tensorflow as tf

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

### Load the data prepared with Data preparation sheet

In [2]:
import numpy as np

X_train = np.load("WIR2-X-train-7.npy")
y_train = np.load("WIR2-y-train-7.npy").reshape(len(X_train),1)
indices_train = np.load("WIR2-indices-train-7.npy")
seq_length_train = np.load("WIR2-seq_length-train-7.npy")

X_eval = np.load("WIR2-X-eval-7.npy")
y_eval= np.load("WIR2-y-eval-7.npy").reshape(len(X_eval),1)
indices_eval = np.load("WIR2-indices-eval-7.npy")
seq_length_eval = np.load("WIR2-seq_length-eval-7.npy")

print("Number of train samples: ", len(X_train))
print("Number of eval samples: ", len(X_eval))

Number of train samples:  654166
Number of eval samples:  163542


### Build the recurrent neural network 

In [3]:
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.layers import dropout

reset_graph()

n_steps, n_features = X_train.shape[1], X_train.shape[2]
n_neurons = 400
learning_rate = 0.02
keep_prob = 0.5
n_layers = 2

X = tf.placeholder(tf.float32, shape=[None, n_steps, n_features], name="X")
y = tf.placeholder(tf.float32, shape = [None,1], name = "y")
is_training = tf.placeholder_with_default(False, shape=[])
seq_length = tf.placeholder(tf.int32, shape = [None])

# Using alternative cells: GRU Cell or basic cell
#gru_cell_1 = tf.contrib.rnn.GRUCell(num_units=n_neurons, activation = tf.nn.relu)
#gru_cell_2 = tf.contrib.rnn.GRUCell(num_units=n_neurons, activation = tf.nn.relu)
#multi_layer_cell = tf.contrib.rnn.MultiRNNCell([gru_cell_1, gru_cell_2])

#basic_cell_1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation = tf.nn.relu)
#basic_cell_2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation = tf.nn.relu)

# Using STM cell
improved_STM_cell_1 = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units=n_neurons, activation = tf.nn.relu, dropout_keep_prob = 0.5)
improved_STM_cell_2 = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units=n_neurons, activation = tf.nn.relu, dropout_keep_prob = 0.5)
multi_layer_cell = tf.contrib.rnn.MultiRNNCell([improved_STM_cell_1, improved_STM_cell_2], state_is_tuple = True)

outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype= tf.float32, sequence_length=seq_length)

#Last layer states, consireding there are 2 layers
last_states = states[1]

#Last relevant output: for a LSTM, states is a tuple (c, h) where c is the last state and h the last output
last_output = last_states[1]

#last_output = last_states

# Dropout before fully connected layer
output_drop = dropout(last_output, keep_prob, is_training=is_training ) 

# Single neuron output: 'Expected' prediction
prediction = fully_connected(output_drop, 1, activation_fn=None)

# Calculate the absolute error
vector_error = tf.abs(prediction-y)

error = tf.reduce_mean(vector_error)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(error)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Train the network

In [5]:
n_epochs = 20
batch_size = 1000
SAVE_FILE = "./my_rnn_WIR2_2LNLSTM_400.ckpt"

train_errors = np.zeros((len(X_train),1))
eval_errors = np.zeros((len(X_eval),1))
best_error = np.infty
checks_without_progress = 0
max_checks_without_progress = 5
stopping = False

with tf.Session() as sess:
    init.run()
    
    # Restore the last version of the network
    saver.restore(sess, SAVE_FILE)

    for epoch in range(n_epochs):
        i=0
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch, seq_length_batch = X_train[rnd_indices], y_train[rnd_indices], seq_length_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch, is_training: True})
            if (i%100 == 0): 

                # Evaluation of the result for each stop
                print('Evaluating...')

                # Evaluation on the full eval set
                for indices in np.array_split(range(len(X_eval)), len(X_eval) // (batch_size*5)):
                    X_batch, y_batch, seq_length_batch = X_eval[indices], y_eval[indices], seq_length_eval[indices]
                    eval_errors[indices] = vector_error.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch, is_training: False})            
                eval_error = np.mean(eval_errors)

                print("Epoch: ", epoch, "step: ",i*batch_size, " Eval error:", eval_error)
                
                if eval_error < best_error:
                    #Saving the last version of the network
                    checks_without_progress = 0
                    save_path = saver.save(sess, SAVE_FILE)
                    best_error = eval_error

                else: 
                    checks_without_progress += 1
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        stopping = True
                        break
                        
            i += 1
        if stopping: 
            break



with tf.Session() as sess:
    saver.restore(sess, SAVE_FILE)
    print("Final eval error: {:.3f}".format(best_error))               

Evaluating...
Epoch:  0 step:  0  Eval error: 11.4319479861
Evaluating...
Epoch:  0 step:  100000  Eval error: 7.24020764543
Evaluating...
Epoch:  0 step:  200000  Eval error: 7.22178762439
Evaluating...
Epoch:  0 step:  300000  Eval error: 7.17070942122
Evaluating...
Epoch:  0 step:  400000  Eval error: 7.19021157141
Evaluating...
Epoch:  0 step:  500000  Eval error: 7.11182463223
Evaluating...
Epoch:  0 step:  600000  Eval error: 7.12102944025
Evaluating...
Epoch:  1 step:  0  Eval error: 7.11434722274
Evaluating...
Epoch:  1 step:  100000  Eval error: 7.09075976615
Evaluating...
Epoch:  1 step:  200000  Eval error: 7.10758454773
Evaluating...
Epoch:  1 step:  300000  Eval error: 7.08634629195
Evaluating...
Epoch:  1 step:  400000  Eval error: 7.08532335739
Evaluating...
Epoch:  1 step:  500000  Eval error: 7.07933107544
Evaluating...
Epoch:  1 step:  600000  Eval error: 7.06820142117
Evaluating...
Epoch:  2 step:  0  Eval error: 7.08618465906
Evaluating...
Epoch:  2 step:  100000  E

### Observed results

improved SLTM, 2 layers, 50 neurons, WIR2-X-3.npy, batch:5000,  Train error: 6.385  Test error: 6.917 > could not save this config

improved SLTM, 2 layers, 200 neurons, WIR2-X-3.npy, my_rnn_WIR2-3.ckpt, batch:2500, 

improved SLTM, 2 layers, 100 neurons, my_rnn_WIR2_2LNLSTM_100n.ckpt, batch:2500, lr:0.005, eval error: 3.46 (but the samples were mixed) > overfitting

2017/11/22: improved SLTM, 2 layers, 25 neurons, my_rnn_WIR2_2LNLSTM_100n.ckpt, batch:10000, lr:0.01, eval error: 10

2017/11/24: improved SLTM, 2 layers, 50 neurons, my_rnn_WIR2_2LNLSTM_50n.ckpt, batch:10000, lr:0.005 (too low), na lines removed,  eval error: 7.82 > rank: 67e on Kaggle

2017/11/24: improved SLTM, 2 layers, 200 neurons, my_rnn_WIR2_2LNLSTM_200n_b, batch:2500, lr:0.01, na lines removed,  eval error: 7.791 > rank Kaggle: 48

2017/11/25: improved GruCell, 2 layers, 200 neurons, my_rnn_WIR2_2GruCell_200n_b, batch:2500, lr:0.01, na lines removed,  eval error: 7.836

2017/11/25: improved GruCell, 2 layers, 400 neurons, my_rnn_WIR2_2GruCell_400n_b, batch:2500, lr:0.01, na lines removed,  eval error: 7.792

2017/11/25: improved SLTM, 2 layers, 200 neurons, my_rnn_WIR2_2LNLSTM_250n_c, batch:2000, lr:0.02, na lines removed,  eval error: 7.788 > rank Kaggle: 35

Change of data set: removing values above 730 mm

2017/11/25: improved SLTM, 2 layers, 200 neurons, my_rnn_WIR2_2LNLSTM_250n_d, batch:2000, lr:0.02, na lines removed,  eval error: 7.009 > rank Kaggle: 26e


### Creating the estimation for submission

In [7]:
import numpy as np

X_test = np.load("WIR2-X-test-7.npy")  
indices_test = np.load("WIR2-indices-test-7.npy")
seq_length_test =  np.load("WIR2-seq_length-test-7.npy")

print("Number of samples: ", len(X_test))

Number of samples:  485477


#### To create the submission set

In [8]:
import numpy as np
batch_size = 10000

n_test_samples = 717625
train_median_value = 1.0160005

results = np.zeros((n_test_samples,2))

wrong_indices_test = np.load("WIR2-wrong_indices-test-7.npy")

with tf.Session() as sess:
    init.run()  
    saver.restore(sess, SAVE_FILE)
    
    for fake_indices in np.array_split(range(len(X_test)), len(X_test) // (batch_size * 5)):
        X_batch, seq_length_batch = X_test[fake_indices], seq_length_test[fake_indices]      
        estimates = prediction.eval(feed_dict={X: X_batch, seq_length: seq_length_batch, is_training: False})
        
        print("starting at: ", fake_indices[0])
        
        results[fake_indices,1] = estimates.transpose()
        results[fake_indices,0] = indices_test[fake_indices]
        results[len(X_test):,1] = train_median_value
        results[len(X_test):,0] = wrong_indices_test

INFO:tensorflow:Restoring parameters from ./my_rnn_WIR2_2LNLSTM_250n_d.ckpt
starting at:  0
starting at:  53942
starting at:  107884
starting at:  161826
starting at:  215768
starting at:  269710
starting at:  323652
starting at:  377594
starting at:  431536


In [9]:
import pandas as pd

output = pd.DataFrame(data= results, index= results[:,0], columns=['Id', 'Expected'])
output.sort_values(by=['Id'],inplace = True)
output.Id = output.Id.astype(int)

In [10]:
output.head(100)

Unnamed: 0,Id,Expected
1.0,1,0.366489
2.0,2,0.673109
3.0,3,3.931320
4.0,4,5.010110
5.0,5,1.016001
6.0,6,1.133926
7.0,7,2.920722
8.0,8,1.091403
9.0,9,1.016001
10.0,10,4.587115


In [11]:
outfile = "solution_submit_20171125c.csv"
output.to_csv(outfile, header=True, index=False)

### Estimating if the model beats the median

In [6]:
stupid_pred = np.ones((len(y_eval),1))*np.median(y_eval)
eval_errors = np.zeros((len(X_eval),1))
predictions = np.zeros((len(X_eval),1))
batch_size = 50000

with tf.Session() as sess:
    init.run() 
    saver.restore(sess, "./my_rnn_WIR2_2LNLSTM_250n_d.ckpt")
    
    for indices in np.array_split(range(len(X_eval)), len(X_eval) // batch_size):
        X_batch, y_batch,  seq_length_batch = X_eval[indices], y_eval[indices],seq_length_eval[indices]     
        eval_errors[indices] = vector_error.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch, is_training: False})
        predictions[indices] = prediction.eval(feed_dict={X: X_batch, seq_length: seq_length_batch, is_training: False})
    
    eval_error = np.mean(eval_errors)
    stupid_prediction_error = np.mean(np.abs(stupid_pred - y_eval))
    mean_prediction = np.mean(predictions)
    
    print("Evaluation error:", eval_error, "Median value error:", stupid_prediction_error)

INFO:tensorflow:Restoring parameters from ./my_rnn_WIR2_2LNLSTM_250n_d.ckpt
Evaluation error: 7.00648677809 Median value error: 7.44104735567
