## Initialization

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [29]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

## Import the Data

In [3]:
dataset = pd.read_excel('Folds5x2_pp.xlsx')
dataset.shape

(9568, 5)

In [4]:
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


## Create X and Y

In [5]:
X = dataset.iloc[:, 0:4].values
Y = dataset.iloc[:, 4].values

In [6]:
X

array([[   14.96,    41.76,  1024.07,    73.17],
       [   25.18,    62.96,  1020.04,    59.08],
       [    5.11,    39.4 ,  1012.16,    92.14],
       ..., 
       [   31.32,    74.33,  1012.92,    36.48],
       [   24.48,    69.45,  1013.86,    62.39],
       [   21.6 ,    62.52,  1017.23,    67.87]])

In [7]:
Y

array([ 463.26,  444.37,  488.56, ...,  429.57,  435.74,  453.28])

## Preprocess the Data

In [8]:
sc_X = StandardScaler()

In [9]:
X = sc_X.fit_transform(X)

In [10]:
X

array([[-0.62951938, -0.98729659,  1.8204884 , -0.00951935],
       [ 0.74190911,  0.68104512,  1.1418628 , -0.97462052],
       [-1.95129733, -1.17301765, -0.18507756,  1.2898397 ],
       ..., 
       [ 1.56583973,  1.57581141, -0.05709854, -2.52261812],
       [ 0.64797565,  1.19177803,  0.1011913 , -0.74790051],
       [ 0.26150656,  0.64641916,  0.66867722, -0.37254534]])

## Create Train and Test Data

In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 4)

In [12]:
X_train.shape

(7654, 4)

In [13]:
Y_train.shape

(7654,)

In [14]:
X_test.shape

(1914, 4)

In [15]:
Y_test.shape

(1914,)

## Create TF Regression Model

In [16]:
num_features = X.shape[1]
num_features

4

In [18]:
# Create Weights and Biases Variable
W = tf.Variable(tf.zeros([num_features, 1]))
B = tf.Variable(tf.zeros([1]))

In [19]:
# Create x and y_ placeholders for actual values in the dataset
x = tf.placeholder(tf.float32, [None, num_features])
y_ = tf.placeholder(tf.float32, [None, 1])

In [20]:
# y holds model's predicted values
Wx = tf.matmul(x, W)
y = Wx + B

In [21]:
# Create a cost function to minimize
cost = tf.reduce_mean(tf.square(y - y_))

In [23]:
# Create an Optimizer to minimize cost function
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1).minimize(cost)

In [27]:
def trainTheDataset(num_steps, optimizer_to_use, batch_size):
    init = tf.global_variables_initializer()
    # initialize all the global variables
    
    with tf.Session() as sess:
        sess.run(init)
                
        for i in range(num_steps):
            
            # Calculate batch start index
            if batch_size == len(X_train):
                batch_start_index = 0
            elif batch_size > len(X_train):
                raise ValueError("Batch Size : " + str(batch_size) + " cannot be greater than the Data Size : " + str(len(X_train)))
            else:
                batch_start_index = (i * batch_size) % (len(X_train) - batch_size)
                
            # Calculate batch end index
            batch_end_index = batch_start_index + batch_size
            
            # Calculate batch X and Y Values
            batch_X_Values = X_train[batch_start_index : batch_end_index]
            batch_Y_Values = Y_train[batch_start_index : batch_end_index]
            
            # Create feed dictionary to feed the optimizer
            feed = {x : np.array(batch_X_Values), y_ : np.transpose(np.array([batch_Y_Values]))}
            
            sess.run(optimizer_to_use, feed_dict = feed)
            
            # Print out the cost, W, B for every 2nd Iteration
            if (i + 1) % 2 == 0:
                print("After " + str(i) + " iterations, Cost : ", sess.run(cost, feed_dict = feed))
                print("W : ", sess.run(W))
                print("B : ", sess.run(B))
                print("")
            
        # Calculate the would be predicted values by the model
        Y_pred = sess.run(y, feed_dict = {x : np.array(X_test)})
            
        # Close the Session
        sess.close()
        
        # Return the predicted values for Test Set
        return Y_pred

In [32]:
Y_pred = trainTheDataset(num_steps = 500, optimizer_to_use = optimizer, batch_size = len(X_train))

After 1 iterations, Cost :  84641.5
W :  [[-4.42813969]
 [-3.71113825]
 [ 2.10800862]
 [ 0.82655996]]
B :  [ 163.54986572]

After 3 iterations, Cost :  34686.9
W :  [[-6.30298281]
 [-5.10338736]
 [ 2.58487344]
 [ 0.67585492]]
B :  [ 268.23977661]

After 5 iterations, Cost :  14225.3
W :  [[-7.33066845]
 [-5.72757244]
 [ 2.57841682]
 [ 0.38781536]]
B :  [ 335.2472229]

After 7 iterations, Cost :  5843.17
W :  [[-8.01461124]
 [-6.04105997]
 [ 2.42748833]
 [ 0.13390431]]
B :  [ 378.13424683]

After 9 iterations, Cost :  2409.13
W :  [[-8.52525043]
 [-6.198946  ]
 [ 2.24116421]
 [-0.06855816]]
B :  [ 405.58300781]

After 11 iterations, Cost :  1002.07
W :  [[-8.93296909]
 [-6.26574707]
 [ 2.05758786]
 [-0.23094389]]
B :  [ 423.1506958]

After 13 iterations, Cost :  425.387
W :  [[-9.27327824]
 [-6.27451324]
 [ 1.89027762]
 [-0.36547953]]
B :  [ 434.39422607]

After 15 iterations, Cost :  188.903
W :  [[-9.5668354 ]
 [-6.24488449]
 [ 1.74311197]
 [-0.48079565]]
B :  [ 441.59011841]

After 1

After 223 iterations, Cost :  20.5711
W :  [[-14.75544071]
 [ -2.96457267]
 [  0.36670971]
 [ -2.27627301]]
B :  [ 454.36880493]

After 225 iterations, Cost :  20.571
W :  [[-14.75831413]
 [ -2.96244383]
 [  0.36616853]
 [ -2.27719855]]
B :  [ 454.36880493]

After 227 iterations, Cost :  20.5709
W :  [[-14.76107311]
 [ -2.96040058]
 [  0.3656491 ]
 [ -2.2780869 ]]
B :  [ 454.36880493]

After 229 iterations, Cost :  20.5709
W :  [[-14.76372147]
 [ -2.95843887]
 [  0.36515051]
 [ -2.27893972]]
B :  [ 454.36880493]

After 231 iterations, Cost :  20.5708
W :  [[-14.76626301]
 [ -2.95655608]
 [  0.36467195]
 [ -2.27975821]]
B :  [ 454.36877441]

After 233 iterations, Cost :  20.5707
W :  [[-14.76870251]
 [ -2.95474887]
 [  0.36421263]
 [ -2.2805438 ]]
B :  [ 454.36877441]

After 235 iterations, Cost :  20.5707
W :  [[-14.77104568]
 [ -2.95301414]
 [  0.36377165]
 [ -2.28129768]]
B :  [ 454.36877441]

After 237 iterations, Cost :  20.5707
W :  [[-14.77329445]
 [ -2.95134878]
 [  0.36334813]



After 449 iterations, Cost :  20.5702
W :  [[-14.82641029]
 [ -2.91200566]
 [  0.35334697]
 [ -2.29912543]]
B :  [ 454.36859131]

After 451 iterations, Cost :  20.5702
W :  [[-14.8264389 ]
 [ -2.91198492]
 [  0.3533417 ]
 [ -2.29913449]]
B :  [ 454.36859131]

After 453 iterations, Cost :  20.5702
W :  [[-14.82646561]
 [ -2.91196465]
 [  0.35333657]
 [ -2.29914308]]
B :  [ 454.36859131]

After 455 iterations, Cost :  20.5702
W :  [[-14.82649136]
 [ -2.91194534]
 [  0.35333171]
 [ -2.29915142]]
B :  [ 454.36859131]

After 457 iterations, Cost :  20.5702
W :  [[-14.82651615]
 [ -2.91192698]
 [  0.3533271 ]
 [ -2.29915929]]
B :  [ 454.36859131]

After 459 iterations, Cost :  20.5702
W :  [[-14.82653999]
 [ -2.91190934]
 [  0.35332263]
 [ -2.29916692]]
B :  [ 454.36859131]

After 461 iterations, Cost :  20.5702
W :  [[-14.82656288]
 [ -2.91189241]
 [  0.35331839]
 [ -2.29917431]]
B :  [ 454.36859131]

After 463 iterations, Cost :  20.5702
W :  [[-14.82658482]
 [ -2.91187596]
 [  0.35331422

## Check the Regression metrics

In [33]:
mean_squared_error(Y_pred, Y_test)

21.569778503602574

In [34]:
r2_score(Y_pred, Y_test)

0.91987562263330558