## Initialization

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

## Importing the Data

In [2]:
dataset = pd.read_csv('machine.data', header = None)
dataset.shape

(209, 10)

In [3]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,adviser,32/60,125,256,6000,256,16,128,198,199
1,amdahl,470v/7,29,8000,32000,32,8,32,269,253
2,amdahl,470v/7a,29,8000,32000,32,8,32,220,253
3,amdahl,470v/7b,29,8000,32000,32,8,32,172,253
4,amdahl,470v/7c,29,8000,16000,32,8,16,132,132


## Create X and Y

In [4]:
X = dataset.iloc[:, 2:8].values
Y = dataset.iloc[:, 8].values

In [5]:
X.shape

(209, 6)

In [6]:
Y.shape

(209,)

In [7]:
X

array([[  125,   256,  6000,   256,    16,   128],
       [   29,  8000, 32000,    32,     8,    32],
       [   29,  8000, 32000,    32,     8,    32],
       ..., 
       [  125,  2000,  8000,     0,     2,    14],
       [  480,   512,  8000,    32,     0,     0],
       [  480,  1000,  4000,     0,     0,     0]], dtype=int64)

In [8]:
Y

array([ 198,  269,  220,  172,  132,  318,  367,  489,  636, 1144,   38,
         40,   92,  138,   10,   35,   19,   28,   31,  120,   30,   33,
         61,   76,   23,   69,   33,   27,   77,   27,  274,  368,   32,
         63,  106,  208,   20,   29,   71,   26,   36,   40,   52,   60,
         72,   72,   18,   20,   40,   62,   24,   24,  138,   36,   26,
         60,   71,   12,   14,   20,   16,   22,   36,  144,  144,  259,
         17,   26,   32,   32,   62,   64,   22,   36,   44,   50,   45,
         53,   36,   84,   16,   38,   38,   16,   22,   29,   40,   35,
        134,   66,  141,  189,   22,  132,  237,  465,  465,  277,  185,
          6,   24,   45,    7,   13,   16,   32,   32,   11,   11,   18,
         22,   37,   40,   34,   50,   76,   66,   24,   49,   66,  100,
        133,   12,   18,   20,   27,   45,   56,   70,   80,  136,   16,
         26,   32,   45,   54,   65,   30,   50,   40,   62,   60,   50,
         66,   86,   74,   93,  111,  143,  105,  2

## Preprocess the Data

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
sc_X = StandardScaler()

In [11]:
X = sc_X.fit_transform(X)



In [12]:
X

array([[-0.30358614, -0.67502598, -0.49546221,  5.69420785,  1.66198873,
         4.23103334],
       [-0.67332952,  1.32629082,  1.72704937,  0.16762947,  0.48550899,
         0.52947875],
       [-0.67332952,  1.32629082,  1.72704937,  0.16762947,  0.48550899,
         0.52947875],
       ..., 
       [-0.30358614, -0.2243162 , -0.32449978, -0.62188173, -0.39685082,
        -0.16456273],
       [ 1.06369405, -0.60886675, -0.32449978,  0.16762947, -0.69097076,
        -0.70437278],
       [ 1.06369405, -0.48275071, -0.66642464, -0.62188173, -0.69097076,
        -0.70437278]])

## Create Train and Test Data

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 4)

In [15]:
X_train.shape

(167, 6)

In [16]:
X_test.shape

(42, 6)

In [17]:
Y_train.shape

(167,)

In [18]:
Y_test.shape

(42,)

## Create and train the TensorFlow ANN Regressor

In [19]:
# Model the Linear Regression Y = W1.X1 + W2.X2 + ... + Wn.Xn + B
# Create Weights and Biases TF Variables
num_features = X.shape[1]

W = tf.Variable(tf.zeros([num_features, 1]))
B = tf.Variable(tf.zeros([1]))

In [20]:
# Create x and y_ placeholders for train data
x = tf.placeholder(tf.float32, [None, num_features])
Wx = tf.matmul(x, W)

# y holds model's predicted values
y = Wx + B

# y_ is a placeholder for actual y values
y_ = tf.placeholder(tf.float32, [None, 1])

In [21]:
# Create Cost Function
cost = tf.reduce_mean(tf.square(y - y_))

In [22]:
# Create the optimizer which will minimize the cost
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(cost)

In [23]:
def trainTheData(num_steps, optimizer_to_use, batch_size):
    init = tf.global_variables_initializer()
    # Initialize all the Global Variables
    
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(num_steps):
            
            # Calculate the starting index for a batch
            if batch_size == len(X_train):
                batch_start_index = 0
            elif batch_size > len(X_train):
                raise ValueError("Batch Size : " + str(batch_size) + ", must be less than Data Size : " + str(len(X_train)))
            else:
                batch_start_index = (i * batch_size) % (len(X_train) - batch_size)
            
            # Calculate the ending index for a batch
            batch_end_index = batch_start_index + batch_size
            
            # Get the X and Y values for the batch
            batch_X_values = X_train[batch_start_index : batch_end_index]
            batch_Y_values = Y_train[batch_start_index : batch_end_index]
            
            # Create the feed dictionary to be fed into the optimizer
            feed = {x : np.array(batch_X_values), y_ : np.transpose(np.array([batch_Y_values]))}
            
            sess.run(optimizer_to_use, feed_dict = feed)
            
            # Print out the cost and other values for every 2nd iteration
            if (i + 1) % 2 == 0:
                print("After "+str(i)+" Iterations, Cost : ", sess.run(cost, feed_dict = feed))
                print("W : ", sess.run(W))
                print("B : ", sess.run(B))
                print("")
                
        # Calculate the would be predicted values for test data by the model
        Y_pred = sess.run(y, feed_dict = {x : np.array(X_test)})
        
        # Close the session
        sess.close()
        
        # return the predicted values for Y
        return Y_pred

In [24]:
Y_pred = trainTheData(150, optimizer, len(X_train))

After 1 Iterations, Cost :  9890.53
W :  [[-10.5028429 ]
 [ 39.78395462]
 [ 36.28247452]
 [ 29.54290199]
 [ 25.2005043 ]
 [ 21.05860138]]
B :  [ 38.05379105]

After 3 Iterations, Cost :  6379.12
W :  [[ -5.70243931]
 [ 48.58276367]
 [ 44.10777283]
 [ 31.42985916]
 [ 23.90012741]
 [ 23.05827522]]
B :  [ 62.5527153]

After 5 Iterations, Cost :  4923.72
W :  [[ -0.94846368]
 [ 52.63629913]
 [ 48.36685562]
 [ 30.57162857]
 [ 20.51389694]
 [ 23.92304993]]
B :  [ 78.26096344]

After 7 Iterations, Cost :  4257.59
W :  [[  2.65181327]
 [ 55.12083054]
 [ 51.71017456]
 [ 29.53013039]
 [ 17.38670731]
 [ 25.08814049]]
B :  [ 88.34851837]

After 9 Iterations, Cost :  3932.93
W :  [[  5.30530596]
 [ 56.7100563 ]
 [ 54.51241684]
 [ 28.58181381]
 [ 14.66855431]
 [ 26.46712112]]
B :  [ 94.8476944]

After 11 Iterations, Cost :  3761.7
W :  [[  7.26219082]
 [ 57.70975876]
 [ 56.8955307 ]
 [ 27.75656509]
 [ 12.31036472]
 [ 27.91178894]]
B :  [ 99.05425262]

After 13 Iterations, Cost :  3663.11
W :  [[  8.

B :  [ 107.98944092]

After 117 Iterations, Cost :  3439.24
W :  [[ 13.73457527]
 [ 54.61194992]
 [ 75.50448608]
 [ 23.86362267]
 [ -3.87682223]
 [ 39.72914505]]
B :  [ 107.99084473]

After 119 Iterations, Cost :  3439.23
W :  [[ 13.73550892]
 [ 54.59563828]
 [ 75.52431488]
 [ 23.86594009]
 [ -3.87748456]
 [ 39.72364426]]
B :  [ 107.99215698]

After 121 Iterations, Cost :  3439.23
W :  [[ 13.73637295]
 [ 54.58032227]
 [ 75.54286957]
 [ 23.86812782]
 [ -3.87800312]
 [ 39.71839523]]
B :  [ 107.99337769]

After 123 Iterations, Cost :  3439.23
W :  [[ 13.73717213]
 [ 54.56594849]
 [ 75.5602417 ]
 [ 23.87019539]
 [ -3.8783989 ]
 [ 39.71339035]]
B :  [ 107.99451447]

After 125 Iterations, Cost :  3439.22
W :  [[ 13.73791409]
 [ 54.55245209]
 [ 75.57649994]
 [ 23.87214279]
 [ -3.87869382]
 [ 39.70862198]]
B :  [ 107.99558258]

After 127 Iterations, Cost :  3439.22
W :  [[ 13.73860168]
 [ 54.53978729]
 [ 75.59173584]
 [ 23.87397957]
 [ -3.8789022 ]
 [ 39.7040863 ]]
B :  [ 107.9965744]

After 1

In [25]:
Y_pred

array([[ 200.35064697],
       [ 228.77294922],
       [  57.79123688],
       [  -3.39061737],
       [ 283.92956543],
       [ 324.83172607],
       [  43.55577087],
       [  -3.13371277],
       [ 122.23786926],
       [ 248.77929688],
       [ 217.50112915],
       [   2.39854431],
       [ -19.27331543],
       [  18.5236969 ],
       [  20.91769409],
       [ 231.92678833],
       [  83.50999451],
       [ 787.66046143],
       [ 372.12744141],
       [   5.77098083],
       [ -34.20420837],
       [   8.77597046],
       [ 271.27770996],
       [  32.35202026],
       [ 283.92956543],
       [ -14.95013428],
       [  35.23656464],
       [  36.28730774],
       [ 111.25990295],
       [ -13.27415466],
       [  41.39196777],
       [ 118.7695694 ],
       [  95.28335571],
       [  25.75640869],
       [  10.56394958],
       [  99.65618134],
       [ 451.37011719],
       [  30.66970825],
       [  43.41369629],
       [  79.02217865],
       [  33.15346527],
       [  -3.502

## Check the Relevant Metrics for the Linear Regression Model

In [26]:
from sklearn.metrics import mean_squared_error, r2_score

In [27]:
mean_squared_error(Y_pred, Y_test)

3904.0793229793962

In [28]:
r2_score(Y_pred, Y_test)

0.84571706626839849