## Initialization

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

## Import the Data

In [3]:
dataset = pd.read_csv('day.csv')
dataset.shape

(731, 16)

In [4]:
dataset.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


## Create X and Y

In [5]:
X = dataset.iloc[:, 2:14].values
Y = dataset.iloc[:, 15].values

In [6]:
X

array([[  1.00000000e+00,   0.00000000e+00,   1.00000000e+00, ...,
          8.05833000e-01,   1.60446000e-01,   3.31000000e+02],
       [  1.00000000e+00,   0.00000000e+00,   1.00000000e+00, ...,
          6.96087000e-01,   2.48539000e-01,   1.31000000e+02],
       [  1.00000000e+00,   0.00000000e+00,   1.00000000e+00, ...,
          4.37273000e-01,   2.48309000e-01,   1.20000000e+02],
       ..., 
       [  1.00000000e+00,   1.00000000e+00,   1.20000000e+01, ...,
          7.52917000e-01,   1.24383000e-01,   1.59000000e+02],
       [  1.00000000e+00,   1.00000000e+00,   1.20000000e+01, ...,
          4.83333000e-01,   3.50754000e-01,   3.64000000e+02],
       [  1.00000000e+00,   1.00000000e+00,   1.20000000e+01, ...,
          5.77500000e-01,   1.54846000e-01,   4.39000000e+02]])

In [8]:
Y

array([ 985,  801, 1349, 1562, 1600, 1606, 1510,  959,  822, 1321, 1263,
       1162, 1406, 1421, 1248, 1204, 1000,  683, 1650, 1927, 1543,  981,
        986, 1416, 1985,  506,  431, 1167, 1098, 1096, 1501, 1360, 1526,
       1550, 1708, 1005, 1623, 1712, 1530, 1605, 1538, 1746, 1472, 1589,
       1913, 1815, 2115, 2475, 2927, 1635, 1812, 1107, 1450, 1917, 1807,
       1461, 1969, 2402, 1446, 1851, 2134, 1685, 1944, 2077,  605, 1872,
       2133, 1891,  623, 1977, 2132, 2417, 2046, 2056, 2192, 2744, 3239,
       3117, 2471, 2077, 2703, 2121, 1865, 2210, 2496, 1693, 2028, 2425,
       1536, 1685, 2227, 2252, 3249, 3115, 1795, 2808, 3141, 1471, 2455,
       2895, 3348, 2034, 2162, 3267, 3126,  795, 3744, 3429, 3204, 3944,
       4189, 1683, 4036, 4191, 4073, 4400, 3872, 4058, 4595, 5312, 3351,
       4401, 4451, 2633, 4433, 4608, 4714, 4333, 4362, 4803, 4182, 4864,
       4105, 3409, 4553, 3958, 4123, 3855, 4575, 4917, 5805, 4660, 4274,
       4492, 4978, 4677, 4679, 4758, 4788, 4098, 39

## Preprocess the Data

In [9]:
sc_X = StandardScaler()

In [10]:
X = sc_X.fit_transform(X)

In [11]:
X

array([[-1.34821315, -1.00136893, -1.60016072, ...,  1.25017133,
        -0.38789169, -0.7537338 ],
       [-1.34821315, -1.00136893, -1.60016072, ...,  0.47911298,
         0.74960172, -1.04521412],
       [-1.34821315, -1.00136893, -1.60016072, ..., -1.33927398,
         0.74663186, -1.06124554],
       ..., 
       [-1.34821315,  0.99863295,  1.58866019, ...,  0.87839173,
        -0.85355213, -1.00440687],
       [-1.34821315,  0.99863295,  1.58866019, ..., -1.01566357,
         2.06944426, -0.70563955],
       [-1.34821315,  0.99863295,  1.58866019, ..., -0.35406086,
        -0.46020122, -0.59633443]])

## Create Train and Test Data

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 4)

In [13]:
X_train.shape

(584, 12)

In [14]:
X_test.shape

(147, 12)

In [15]:
Y_train.shape

(584,)

In [16]:
Y_test.shape

(147,)

## Create TF Regressor

In [17]:
num_features = X.shape[1]
num_features

12

In [18]:
# Create Weights and Biases Variable
W = tf.Variable(tf.zeros([num_features, 1]))
B = tf.Variable(tf.zeros([1]))

In [19]:
# Create x and y_ placeholders for holding actual dataset values
x = tf.placeholder(tf.float32, [None, num_features])
y_ = tf.placeholder(tf.float32, [None, 1])

In [20]:
# y holds the model's predicted values
Wx = tf.matmul(x, W)
y = Wx + B

In [21]:
# Create Cost function which has to be minimized
cost = tf.reduce_mean(tf.square(y - y_))

In [22]:
# Create an optimizer to minimize cost function
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1).minimize(cost)

In [23]:
def trainTheData(num_steps, optimizer_to_use, batch_size):
    init = tf.global_variables_initializer()
    # initialize the global variables
    
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(num_steps):
            
            # Calculate batch start index
            if batch_size == len(X_train):
                batch_start_index = 0
            elif batch_size > len(X_train):
                raise ValueError("Batch Size : " + str(batch_size) + " cannot be greater than Data Size : ",len(X_train))
            else:
                batch_start_index = (i * batch_size) % (len(X_train) - batch_size)
                
            # Calculate batch end index
            batch_end_index = batch_start_index + batch_size
            
            # Create Batch X and Y values
            batch_X_values = X_train[batch_start_index : batch_end_index]
            batch_Y_values = Y_train[batch_start_index : batch_end_index]
            
            # Create feed dictionary to feed it to the optimizer
            feed = {x : np.array(batch_X_values), y_ : np.transpose(np.array([batch_Y_values]))}
            
            sess.run(optimizer_to_use, feed_dict = feed)
            
            # Print out every 2nd iteration value
            if (i + 1) % 2 == 0:
                print("After " + str(i) + " iterations, Cost : ", sess.run(cost, feed_dict = feed))
                print("W : ", sess.run(W))
                print("B : ", sess.run(B))
                print("")
                
        Y_pred = sess.run(y, feed_dict = {x : np.array(X_test)})
        
        sess.close()
        return Y_pred        

In [24]:
Y_pred = trainTheData(num_steps = 200, optimizer_to_use = optimizer, batch_size = len(X_train))

After 1 iterations, Cost :  9.21693e+06
W :  [[ 218.92228699]
 [ 391.8949585 ]
 [ 140.05288696]
 [ -61.96574402]
 [  12.12859154]
 [  70.70095825]
 [-193.33322144]
 [ 367.48690796]
 [ 365.62405396]
 [ -75.96592712]
 [ -90.56378937]
 [ 387.70281982]]
B :  [ 1623.3762207]

After 3 iterations, Cost :  4.02578e+06
W :  [[ 252.26815796]
 [ 596.85321045]
 [ 135.44932556]
 [ -87.85930634]
 [  22.45712852]
 [ 152.05499268]
 [-246.3677063 ]
 [ 411.4798584 ]
 [ 408.11843872]
 [-122.54645538]
 [ -98.27160645]
 [ 492.8482666 ]]
B :  [ 2658.41333008]

After 5 iterations, Cost :  1.93541e+06
W :  [[ 265.45632935]
 [ 715.18536377]
 [ 121.11045074]
 [ -95.46099091]
 [  33.0662384 ]
 [ 216.36422729]
 [-260.03482056]
 [ 401.38296509]
 [ 397.69400024]
 [-139.49790955]
 [-104.2460556 ]
 [ 542.26727295]]
B :  [ 3319.4855957]

After 7 iterations, Cost :  1.07667e+06
W :  [[ 277.53872681]
 [ 785.40203857]
 [ 109.65735626]
 [ -93.70695496]
 [  42.71106339]
 [ 265.0847168 ]
 [-261.97952271]
 [ 383.38195801]
 [

After 121 iterations, Cost :  411872.0
W :  [[  419.00314331]
 [  816.17602539]
 [  -23.67508125]
 [    1.2428056 ]
 [   54.79554749]
 [  623.68481445]
 [ -240.42298889]
 [  184.37931824]
 [  246.79322815]
 [  -72.03892517]
 [ -120.86180878]
 [ 1011.19299316]]
B :  [ 4494.89111328]

After 123 iterations, Cost :  411867.0
W :  [[  419.103302  ]
 [  816.06768799]
 [  -23.77644157]
 [    1.32302392]
 [   54.7653656 ]
 [  624.02679443]
 [ -240.40486145]
 [  183.71116638]
 [  247.16290283]
 [  -71.96292877]
 [ -120.79600525]
 [ 1011.66143799]]
B :  [ 4494.89208984]

After 125 iterations, Cost :  411863.0
W :  [[  419.1947937 ]
 [  815.96716309]
 [  -23.8692131 ]
 [    1.39782929]
 [   54.73757935]
 [  624.34460449]
 [ -240.38774109]
 [  183.05570984]
 [  247.54145813]
 [  -71.89279175]
 [ -120.73416901]
 [ 1012.09667969]]
B :  [ 4494.89257812]

After 127 iterations, Cost :  411859.0
W :  [[  419.27828979]
 [  815.87384033]
 [  -23.95408058]
 [    1.46760356]
 [   54.71202087]
 [  624.640014

## Check the Regression Metrics

In [25]:
mean_squared_error(Y_pred, Y_test)

391697.77537371474

In [26]:
r2_score(Y_pred, Y_test)

0.87810138353429323