In [1]:
# Import libs && packages.
import tensorflow as tf
import numpy as np
import pandas as pd
import scipy.optimize as opt
import statsmodels.api as sm
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

In [2]:
# Construct model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with ReLU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)

    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [3]:
# Learning parameters
learning_rate = 0.001
training_epochs = 50000
batch_size = 1000
display_step = 1000

In [21]:
# Handle data.
df = pd.read_csv('../data/20190606180000-20190606190000.csv')
df=df.replace([np.inf, -np.inf], np.nan)
df=df.dropna();
df=df.round(decimals=5)  # round to one decimal after precision of devices
df['diff'] = (df['B_VIMIN'] - df['B:VIMIN'])

dep_cols = [ 'diff','B:VIMAX', 'B_VIMAX',  'B:IMAXXO', 'B_VINHBT',   'B:DCPG' , 'B:DCIG', 'B:VIPHAS',
                'B:PS1VGP', 'B:PS1VGM', 'B:GMPS1V', 'B:PS2VGP', 'B:PS2VGM',
               'B:GMPS2V', 'B:PS3VGP', 'B:PS3VGM', 'B:GMPS3V', 'B:PS4VGP', 'B:PS4VGM', 'B:GMPS4V','err_prev','y']




df['err_prev'] = df['B:IMINER'].shift(1)#iloc[1:len(df['B:IMINER'])]
df['y'] = df['B:IMINER']
# Taking the data we're focused on.
df2=df[dep_cols]
#df2['index'] = df['time_B:VIMIN']
#df2=df2.set_index(pd.DatetimeIndex(df2['index'])) # set index to time (not in-place operation)
df2=df2.dropna(axis=0)
scaler = MinMaxScaler(feature_range=(0, 1))
df2 = pd.DataFrame(scaler.fit_transform(df2))
df2.columns = dep_cols
dep_cols.remove('y')

  interactivity=interactivity, compiler=compiler, result=result)


In [22]:
print(df2.head())

       diff   B:VIMAX   B_VIMAX  B:IMAXXO  B_VINHBT  B:DCPG  B:DCIG  B:VIPHAS  \
0  0.872654  0.497239  0.630427       0.0       0.0     0.0     0.0  0.498181   
1  0.863452  0.621472  0.630427       0.0       0.0     0.0     0.0  0.495756   
2  0.868715  0.536774  0.630427       0.0       0.0     0.0     0.0  0.494370   
3  0.867996  0.649657  0.630427       0.0       0.0     0.0     0.0  0.494370   
4  0.870279  0.587550  0.630427       0.0       0.0     0.0     0.0  0.497662   

   B:PS1VGP  B:PS1VGM  ...  B:PS2VGM  B:GMPS2V  B:PS3VGP  B:PS3VGM  B:GMPS3V  \
0  0.105263  0.540540  ...  0.659091  0.222973  0.473684  0.714286  0.349869   
1  0.052632  0.513513  ...  0.681818  0.326014  0.421053  0.642857  0.195822   
2  0.122807  0.567568  ...  0.681818  0.172297  0.421053  0.642857  0.195822   
3  0.157895  0.594595  ...  0.659091  0.172297  0.421053  0.607143  0.083551   
4  0.368421  0.567568  ...  0.727273  0.537162  0.421053  0.642857  0.138381   

   B:PS4VGP  B:PS4VGM  B:GMPS4V 

In [23]:
train_data = df2.sample(n = int(0.8*len(df2['diff'])), random_state = 42)
test_data =  df2.drop(train_data.index)

train_stats = train_data.describe()
train_stats = train_stats.transpose()
test_stats = test_data.describe()
test_stats = test_stats.transpose()
print(train_stats.head())

def standardize(x,stats):
  return (x - stats['mean']) / stats['std']
standard_train_data = standardize(train_data, train_stats)
standard_test_data = standardize(test_data, test_stats)

train_data = standard_train_data
test_data  = standard_test_data

train_data.head()
train_data = train_data.dropna(axis=1) # drop columns that are just constants or data of 0.0
train_data.head()
dep_cols = list(train_data.columns)

            count      mean       std       min       25%       50%       75%  \
diff      38485.0  0.869876  0.043363  0.054836  0.856339  0.871101  0.886149   
B:VIMAX   38485.0  0.572780  0.089611  0.000000  0.514201  0.576308  0.632696   
B_VIMAX   38485.0  0.611210  0.052441  0.000000  0.565208  0.630427  0.630427   
B:IMAXXO  38485.0  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
B_VINHBT  38485.0  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   

          max  
diff      1.0  
B:VIMAX   1.0  
B_VIMAX   1.0  
B:IMAXXO  0.0  
B_VINHBT  0.0  


In [24]:
# Network Parameters
n_input =  len(dep_cols)   # layer 0 (input layer) number of features
n_hidden_1 = 10 # layer 1 number of features
n_hidden_2 = 10 # layer 2 number of features
n_output = 1;


# Store layers weight && bias
weights = {
    'h1': tf.Variable(tf.random_uniform([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_uniform([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_uniform([n_hidden_2, n_output]))
}
biases = {
    'b1': tf.Variable(tf.random_uniform([n_hidden_1])),
    'b2': tf.Variable(tf.random_uniform([n_hidden_2])),
    'out': tf.Variable(tf.random_uniform([n_output]))
}

x_data = tf.placeholder(tf.float32, [None, n_input])
y_data = tf.placeholder(tf.float32, [None, n_output])

# Construct model
pred = multilayer_perceptron(x_data, weights, biases)

# Build optimizer. Use mean-squared error for loss.
loss = tf.reduce_mean(tf.square(pred - y_data))
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train = optimizer.minimize(loss)

In [26]:
test_step=0
# Initialize variables. 
init = tf.global_variables_initializer()
# Begin session.
sess = tf.Session()
sess.run(init)
print("step  |   x    |   y    |      prediction     |     mean-squared error")
for step in range(training_epochs):
    train_samp = train_data.sample(n=batch_size, random_state = step)
    x_in = np.array(train_samp[dep_cols].values)
    # x_in = x_in.flatten()
    y_in = np.array(train_samp['y'].values)
    # x_in = x_in.reshape(-1,1)
    y_in = y_in.reshape(-1,1)
    sess.run(train, feed_dict = {x_data: x_in, y_data: y_in})  
    if(step % display_step == 0):
        test_step += 1
        test_samp =  test_data.sample(n=1,random_state = step)        
        x_test = np.array(test_samp[dep_cols].values)
        # x_test = np.transpose(x_test)
        y_test = np.array(test_samp['y'].values)
        y_test = y_test.reshape(-1,1)

        test_pred = sess.run(pred, feed_dict={x_data: x_test})
        test_loss = sess.run(loss, feed_dict={x_data: x_test, y_data: y_test})
        
        print("{0}     | {1}  | {2} |  {3} | {4}".format(test_step, round(x_test[0][0],4), round(y_test[0][0],4),np.round(test_pred[0][0],4), round(test_loss,4)))

step  |   x    |   y    |      prediction     |     mean-squared error
1     | 0.6594  | 0.0002 |  -13.762499809265137 | 189.41419982910156
2     | -0.0604  | -0.4053 |  -0.17319999635219574 | 0.05389999970793724
3     | -1.0259  | 0.2777 |  -0.17499999701976776 | 0.20489999651908875
4     | -0.4792  | -0.7469 |  -0.7822999954223633 | 0.0013000000035390258
5     | -0.1335  | -0.2559 |  -0.4154999852180481 | 0.025499999523162842
6     | 0.1585  | 0.1924 |  0.28299999237060547 | 0.008200000040233135
7     | -0.8097  | -1.2805 |  -0.11739999800920486 | 1.3528000116348267
8     | -0.2841  | -0.7255 |  -0.4422000050544739 | 0.08030000329017639
9     | 0.3075  | 0.3845 |  0.27140000462532043 | 0.012799999676644802
10     | 1.2227  | 1.7933 |  1.7620999813079834 | 0.0010000000474974513
11     | 0.4486  | 0.6193 |  0.7175999879837036 | 0.009700000286102295
12     | -16.9112  | -1.3232 |  -0.9745000004768372 | 0.12160000205039978
13     | 0.3662  | 0.8541 |  1.1654000282287598 | 0.0969000011682

In [27]:
x_test_f = np.array(test_data[dep_cols].values)
x_test_f = np.transpose(x_test)
y_test_f = np.array(test_data['y'].values)
y_test_f = y_test_f.reshape(-1,1)
test_samp = sess.run(pred, feed_dict={x_data: x_test})
test_loss = sess.run(loss, feed_dict={x_data: x_test, y_data: y_test})
print('mse over test set: ', test_loss)

mse over test set:  0.037206274
