## Place csv in the same folder as .pynb notebook

In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
import tensorflow as tf

## 1 Linear regression with one variable from scratch

In [2]:
data1 = pd.read_csv("ex1data1.csv",header=None)
X_1= data1.as_matrix(columns=data1.columns[0:1])
Y_1 = data1.as_matrix(columns=data1.columns[1:])
X_new_1 = np.c_[np.ones((data1.shape[0], 1)), X_1]

In [48]:
print (X_new_1[1:2,:])

[[1.     5.5277]]


In [8]:
from __future__ import division
eta = 0.01  # learning rate
n_iterations = 10000
m1 = data1.shape[0]
theta_1 = np.random.randn(data1.shape[1],1)  # random initialization
gradients = np.zeros((2, 1))
for iteration in range(n_iterations):
    for x in range(m1):
        gradients = gradients+(X_new_1[x:x+1,:].T.dot(X_new_1[x:x+1,:].dot(theta_1)-Y_1[x:x+1,:])/0.5)
    gradients = gradients/m1
    theta_1 = theta_1 - eta * gradients
print (theta_1)

[[-3.89578088]
 [ 1.19303364]]


In [31]:
## TO Verify

theta_normal1 = np.linalg.inv(X_new_1.T.dot(X_new_1)).dot(X_new_1.T).dot(Y_1)
print (theta_normal1)


[[-3.89578088]
 [ 1.19303364]]


# 1 RMSE Linear regression with one variable from scratch 



In [40]:
predict_y_1 = X_new_1.dot(theta_1)
rms_1 = sqrt(mean_squared_error(Y_1, predict_y_1))
print (rms_1)

2.9923139460876023


# 2 -Linear regression with two variables from scratch

In [33]:
data2 = pd.read_csv("ex1data2.csv",header=None)
scaler = StandardScaler()
X = data2.as_matrix(columns=data2.columns[0:2])
Y = data2.as_matrix(columns=data2.columns[2:])
X_new = scaler.fit_transform(X)
Y_new = scaler.fit_transform(Y)
X_new = np.c_[np.ones((data2.shape[0], 1)), X_new]



In [68]:
from __future__ import division
eta = 0.01  # learning rate
n_iterations = 10000
m2 = data2.shape[0]
theta_22 = np.random.randn(data2.shape[1],1)  # random initialization
gradients2 = np.zeros((3, 1))
for iteration in range(n_iterations):
    for x in range(m2):
        gradients2 = gradients2+(X_new[x:x+1,:].T.dot(X_new[x:x+1,:].dot(theta_22)-Y_new[x:x+1,:])/0.5)
    gradients2 = gradients2/m2
    theta_22 = theta_22 - eta * gradients2
print (theta_22)

[[-1.45764704e-16]
 [ 8.84765655e-01]
 [-5.31781837e-02]]


# 2 -Linear regression with two variables from scratch - RMSE


In [69]:
predict_y_2_scratch = X_new.dot(theta_22)
rms_2_scratch = sqrt(mean_squared_error(Y, predict_y_2_scratch))
print (rms_2_scratch)

362191.9244761875


# 2-1 Linear regression with two variables using matrix

In [71]:

eta = 0.01  # learning rate
n_iterations = 10000
theta = np.random.randn(data2.shape[1],1)  # random initialization
for iteration in range(n_iterations):
    #print (iteration)
    gradients = 2/m2 * X_new.T.dot(X_new.dot(theta)- Y_new)
    theta = theta - eta * gradients
print (theta)

[[-1.52695817e-16]
 [ 8.84765655e-01]
 [-5.31781837e-02]]


### 2-1 RMSE Linear regression with two variables using matrix

In [72]:
predict_y = X_new.dot(theta)
rms = sqrt(mean_squared_error(Y, predict_y))
print (rms)

362191.9244761875


# 2-2. Linear regression with two variables using Normal equation

In [74]:
theta_normal = np.linalg.inv(X_new.T.dot(X_new)).dot(X_new.T).dot(Y_new)
print (theta_normal)


[[-1.17961196e-16]
 [ 8.84765655e-01]
 [-5.31781837e-02]]


### 2-2 RMSE Linear regression with two variables using matrix

In [75]:
predict_y2 = X_new.dot(theta_normal)
rms_2_normal = sqrt(mean_squared_error(Y, predict_y2))
print (rms_2_normal)

362191.9244761875


# 3 Linear regression with multiple variables
## 3-1. Linear regression with multiple variables using matrix

In [76]:
data3 = pd.read_csv("ex1data3.csv")
del data3['Unnamed: 0']

In [77]:
X_3 = data3.as_matrix(columns=data3.columns[0:8])
Y_3 = data3.as_matrix(columns=data3.columns[8:])
X_3_new = scaler.fit_transform(X_3)
Y_3_new = scaler.fit_transform(Y_3)
X_3_new = np.c_[np.ones((data3.shape[0], 1)), X_3_new]

In [78]:
eta = 0.01  # learning rate
n_iterations = 10000
m3 = data3.shape[0]

theta_3 = np.random.randn(data3.shape[1],1)  # random initialization
for iteration in range(n_iterations):
    #print (iteration)
    gradients = 2/m3 * X_3_new.T.dot(X_3_new.dot(theta_3)- Y_3_new)
    theta_3 = theta_3 - eta * gradients
print (theta_3)

[[-7.16793570e-15]
 [ 7.18983338e-01]
 [ 1.02916536e-01]
 [-2.30165753e-01]
 [ 2.64966590e-01]
 [-3.90063278e-03]
 [-3.40814906e-02]
 [-7.79774311e-01]
 [-7.54347710e-01]]


## 3-1. RMSE -  Linear regression with multiple variables using matrix

In [79]:
predict_y3 = X_3_new.dot(theta_3)
rms_3_matrix = sqrt(mean_squared_error(Y_3_new, predict_y3))
print (rms_3_matrix)

0.6275088171094902


## 3-2. Linear regression with multiple variables using Normal equation


In [80]:
theta_3_normal = np.linalg.inv(X_3_new.T.dot(X_3_new)).dot(X_3_new.T).dot(Y_3_new)
print (theta_3_normal)


[[-7.26675664e-15]
 [ 7.18952272e-01]
 [ 1.02910780e-01]
 [-2.30106933e-01]
 [ 2.64917894e-01]
 [-3.90232364e-03]
 [-3.40803413e-02]
 [-7.79845446e-01]
 [-7.54415222e-01]]


## 3-2. RMSE -  Linear regression with multiple variables using normal equation

In [81]:
predict_y3_normal = X_3_new.dot(theta_3_normal)
rms_3_normal = sqrt(mean_squared_error(Y_3_new, predict_y3_normal))
print (rms_3_normal)

0.627508816511937


## 3-3. Linear regression with multiple variables using scikit-learn linear regression model

In [82]:
import sklearn
regr_model = sklearn.linear_model.LinearRegression()
X_3_regr_model = scaler.fit_transform(X_3)
regr_model.fit(X_3_regr_model, Y_3_new)
Y_predict_model = regr_model.predict(X_3_regr_model)

## 3-3. RMSE - Linear regression with multiple variables using scikit-learn linear regression model

In [83]:

rms_3_model = sqrt(mean_squared_error(Y_3_new, Y_predict_model))
print (rms_3_model)

0.627508816511937


## 3-4. Linear regression with multiple variables using TensorFlow with RMSE

In [84]:

n_epochs = 10000
learning_rate = 0.01
tf.reset_default_graph()
X_4 = tf.constant(X_3_new, dtype=tf.float32, name="X")
y = tf.constant(Y_3_new, dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([data3.shape[1], 1], -1.0, 1.0), name="theta")
y_pred_4 = tf.matmul(X_4, theta, name="predictions")
error = y_pred_4 - y
mse = tf.sqrt(tf.reduce_mean(tf.square(error), name="mse"))
gradients = 2/m3 * tf.matmul(tf.transpose(X_4), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        sess.run(training_op)
    best_RMSE = mse.eval()
    best_theta = theta.eval()
    print (best_RMSE)

0.62750894
