In [1]:
import math
import tensorflow as tf
import pandas as pd
import numpy as np
import pickle
from IPython.display import display, Math, Latex
gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.8)
s = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

notes:
1: implement a simple feef forward neural network to see how good it works on the dev set
2: define the hyperparameters and try tuning them 

ideas:
1: try modelling the shape using a rnn (maybe lstm) and then feed the results and alpha and Re to feed forward network


In [2]:
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    
    m = X.shape[1]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = int(math.floor(m/mini_batch_size)) 
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [4]:
# import inputs
#with open('polars.pickle', 'r') as fid:
#    polars = pickle.load(fid)
with open('af_points.pickle', 'rb') as fid:
    af_data_dic = pickle.load(fid, encoding='latin1')
with open('af_labels.pickle', 'rb') as fid:
    af_label = pickle.load(fid, encoding='latin1')
with open('label_afs.pickle', 'rb') as fid:
    label_af = pickle.load(fid, encoding='latin1')

In [5]:
# hyperparameters
lamda = 0.05
learning_rate = 0.0005
epochs = 500
batch_size = 128
layers = [80,25,25,4]
s_train, s_dev, s_tes = 0.94, 0.03, 0.03

In [6]:
# create input
data = pd.read_csv('raw_af_data.txt')
index = data.index
h = data.copy()
# normalize re and alpha
# ====================================
mu_a = h['a'].mean()
sigma_a = h['a'].std()
re_max = h['re'].max()
# ====================
h['a'] = (h['a'] - mu_a) / sigma_a
h['re'] = h['re'] / re_max
# ====================================
# shuffle the data three times
h = h.sample(frac=1, axis=0).reset_index(drop=True)
h = h.reindex(np.random.permutation(h.index)).reset_index(drop=True)
# third shuffle
inputs_train = h.sample(frac=s_train)
remaining = h.drop(inputs_train.index)
inputs_train = inputs_train.reset_index(drop=True)
inputs_dev = remaining.sample(frac=(s_dev/(1-s_train)))
inputs_test = remaining.drop(inputs_dev.index).reset_index(drop=True)
inputs_dev = inputs_dev.reset_index(drop=True)


x_train = inputs_train[['af', 're', 'a']].values.transpose()
y_train = inputs_train[['cl', 'cd', 'cdp', 'cm']].values.transpose()
m_train = x_train.shape[1]

x_dev = inputs_dev[['af', 're', 'a']].values.transpose()
y_dev = inputs_dev[['cl', 'cd', 'cdp', 'cm']].values.transpose()
m_dev = x_dev.shape[1]

x_test = inputs_test[['af', 're', 'a']].values.transpose()
y_test = inputs_test[['cl', 'cd', 'cdp', 'cm']].values.transpose()
m_test = x_test.shape[1]

# m_train + m_dev + m_test

In [7]:
n = af_data_dic[label_af[0]]['input'].shape[1] + 2
x = tf.placeholder('float64', shape=(n, None))
y = tf.placeholder('float64', shape=(4, None))

In [8]:
#tf.reset_default_graph()
W1 = tf.get_variable("W1", (layers[0], n), 
                     initializer=tf.contrib.layers.xavier_initializer(seed=0), 
                     dtype=tf.float64)

b1 = tf.get_variable("b1", [layers[0], 1], initializer=tf.zeros_initializer(), dtype=tf.float64)

W2 = tf.get_variable("W2", [layers[1], layers[0]], 
                     initializer=tf.contrib.layers.xavier_initializer(seed=0), 
                     dtype=tf.float64)

b2 = tf.get_variable("b2", [layers[1], 1], initializer=tf.zeros_initializer(), dtype=tf.float64)

W3 = tf.get_variable("W3", [layers[2], layers[1]], 
                     initializer=tf.contrib.layers.xavier_initializer(seed=0), 
                     dtype=tf.float64)

b3 = tf.get_variable("b3", [layers[2], 1], initializer=tf.zeros_initializer(), dtype=tf.float64)

W4 = tf.get_variable("W4", [layers[3], layers[2]], 
                     initializer=tf.contrib.layers.xavier_initializer(seed=0), 
                     dtype=tf.float64)

b4 = tf.get_variable("b4", [layers[3], 1], initializer=tf.zeros_initializer(), dtype=tf.float64)

W0721 13:04:25.641305 140464238892864 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [9]:
# Forward Prop
Z1 = tf.add(tf.matmul(W1, x), b1)
A1 = tf.nn.tanh(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2)
A2 = tf.nn.tanh(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3)
A3 = tf.nn.tanh(Z3) 
Z4 = tf.add(tf.matmul(W4, A3), b4)
A4 = Z4

In [10]:
# compute cost
cost = tf.reduce_mean((y-A4)**2.)

# frobenious regularization
reg = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3) + tf.nn.l2_loss(W4) 

# compute reqularized loss
cost = tf.reduce_mean(cost + lamda * reg)

In [11]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()

W0721 13:04:32.649280 140464238892864 deprecation.py:323] From /home/ariya/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
# training 
seed = 10
costs = []
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        epoch_cost = 0.
        n_batch = int(m_train/batch_size)
        seed = seed + 1
        minibatches = random_mini_batches(x_train, y_train, batch_size, seed)
        for minibatch in minibatches:
            (minibatch_x, minibatch_y) = minibatch
            # convert the x_s
            x_af = minibatch_x[0,:].astype(int)
            x_af = np.array([af_data_dic[label_af[i]]['input'].flatten() for i in x_af]).transpose()
            x_temp = minibatch_x[1:3,:]
            minibatch_x = np.concatenate((x_af, x_temp), axis=0)
            _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={x: minibatch_x, y: minibatch_y})
            epoch_cost += minibatch_cost / n_batch

        # Print the cost every epoch
        if epoch % 1 == 0:
            print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
        if epoch % 1 == 0:
            costs.append(epoch_cost)

    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

    #accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

Cost after epoch 0: 0.182370
Cost after epoch 1: 0.107940
Cost after epoch 2: 0.107919
Cost after epoch 3: 0.107907
Cost after epoch 4: 0.107915


KeyboardInterrupt: 

In [13]:
x_dev

array([[ 3.67000000e+02,  9.40000000e+01,  1.35000000e+02, ...,
         3.67000000e+02,  9.60000000e+01,  1.34000000e+02],
       [ 7.12328767e-01,  7.80821918e-01,  3.01369863e-01, ...,
         4.10958904e-02,  6.98630137e-01,  1.36986301e-01],
       [ 8.69089580e-01, -2.38664736e-01,  1.55078454e+00, ...,
         6.98665839e-01,  1.69708753e-02, -1.60205466e+00]])

In [14]:
x_af = x_dev[0,:].astype(int)
x_af = np.array([af_data_dic[label_af[i]]['input'].flatten() for i in x_af]).transpose()
x_temp = x_dev[1:3,:]
x_dex_n = np.concatenate((x_af, x_temp), axis=0)