# Linear Regression

The purpose of this is to review Python syntax by fitting
a linear regression over simulated data.

In [33]:
import numpy as np
import pandas as pd
import tensorflow as tf
from numpy.linalg import inv
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline

#### Data Genearating Process

In [34]:
np.random.seed(123)
X1 = np.random.normal(1,3,size=1000)
X2 = np.random.normal(-3,1,size=1000)
epsilon = np.random.normal(0,4,size=1000)
y = 2 + 3*X1 - 4*X2 + epsilon

In [35]:
# Create pandas dataframe
df = pd.DataFrame(
    {'X1': X1,
     'X2': X2,
     'y': y        
    }
)

#### OLS Solution

$$ \hat W = \left( X^T X \right)^{-1} X^T y $$

In [36]:
X0 = np.repeat(1,X1.size)
X = np.vstack((X0,X1,X2)).T
Xt = np.transpose(X)

In [37]:
np.dot(inv(np.dot(Xt,X)), np.dot(Xt,y))

array([ 1.90249596,  2.9570328 , -4.05909316])

#### Scikit Learn

In [38]:
lm = LinearRegression()

In [39]:
model = lm.fit(df[['X1','X2']],df['y'])

In [40]:
print(model.intercept_)
print(model.coef_)

1.90249595997
[ 2.9570328  -4.05909316]


#### Tensorflow

Using gradient descent

Newton's Method:
$$ W^* = W - \left. \frac{dL}{dW} \middle/ \frac{d^2 L}{dW^2} \right. $$

For gradient descent, assume that we can calculate first derivative, but not the second derivative.
$$ W^* = W - \eta \frac{dL}{dW} $$

where $\eta$ is called the learning rate

In [129]:
sess = None

def reset_vars():
    sess.run(tf.global_variables_initializer())

def reset_tf():
    global sess
    if sess:
        sess.close()
    tf.reset_default_graph()
    sess = tf.Session()
    
reset_tf()

In [130]:
W = tf.Variable([[0.0],[0.0]], name='weight')
b = tf.Variable([0.0], name = 'bias')
x = tf.placeholder(shape=[None, 2], dtype=tf.float32, name='x')
y_label = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='y_label')
y_ = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.square(y_ - y_label))

In [131]:
eta = tf.constant(0.05, name='learning_rate')
iterate_W = W.assign(W - eta * tf.gradients(loss, W)[0])
iterate_b = b.assign(b - eta * tf.gradients(loss, b)[0])

In [153]:
reset_vars()

for i in range(200):
    j = np.random.randint(len(y))
    sess.run([iterate_W, iterate_b], feed_dict = {x: np.vstack((X1,X2)).T,
                                                 y_label: y.reshape(-1,1)})
    print(sess.run(loss, feed_dict={x: np.vstack((X1,X2)).T,
                                                 y_label: y.reshape(-1,1)}))
    
#.reshape(-1,1) is used to reshape numpy array so that it has 2 dimensions; equivalent to saying (none,1)

53.4344
19.2971
15.8188
15.4638
15.4272
15.423
15.4221
15.4216
15.4211
15.4206
15.4201
15.4197
15.4192
15.4188
15.4183
15.4179
15.4175
15.417
15.4166
15.4162
15.4158
15.4154
15.4151
15.4147
15.4143
15.4139
15.4136
15.4132
15.4129
15.4125
15.4122
15.4119
15.4116
15.4112
15.4109
15.4106
15.4103
15.41
15.4097
15.4095
15.4092
15.4089
15.4086
15.4084
15.4081
15.4078
15.4076
15.4073
15.4071
15.4068
15.4066
15.4064
15.4061
15.4059
15.4057
15.4055
15.4053
15.405
15.4048
15.4046
15.4044
15.4042
15.404
15.4039
15.4037
15.4035
15.4033
15.4031
15.4029
15.4028
15.4026
15.4024
15.4023
15.4021
15.402
15.4018
15.4017
15.4015
15.4014
15.4012
15.4011
15.4009
15.4008
15.4007
15.4005
15.4004
15.4003
15.4001
15.4
15.3999
15.3998
15.3997
15.3995
15.3994
15.3993
15.3992
15.3991
15.399
15.3989
15.3988
15.3987
15.3986
15.3985
15.3984
15.3983
15.3982
15.3981
15.398
15.3979
15.3978
15.3977
15.3977
15.3976
15.3975
15.3974
15.3973
15.3973
15.3972
15.3971
15.397
15.397
15.3969
15.3968
15.3968
15.3967
15.3966
15.396

In [154]:
W_val = sess.run(W)
b_val = sess.run(b)

In [155]:
print(b_val)
print(W_val)

[ 1.79411328]
[[ 2.95760846]
 [-4.09207678]]
