## Programming Exercise 1 - Linear Regression

- [warmUpExercise](#warmUpExercise)
- [Linear regression with one variable](#Linear-regression-with-one-variable)
- [Gradient Descent](#Gradient-Descent)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression


#### warmUpExercise

In [2]:
def warmUpExercise():
    return(np.identity(5))

In [3]:
warmUpExercise()

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

### Linear regression with one variable

In [39]:
data = np.loadtxt('data/ex1data1.txt', delimiter=',')
X = np.c_[np.ones((data.shape[0],1)),data[:,0]]
y = np.c_[data[:,1]]
print(X)

[[  1.       6.1101]
 [  1.       5.5277]
 [  1.       8.5186]
 [  1.       7.0032]
 [  1.       5.8598]
 [  1.       8.3829]
 [  1.       7.4764]
 [  1.       8.5781]
 [  1.       6.4862]
 [  1.       5.0546]
 [  1.       5.7107]
 [  1.      14.164 ]
 [  1.       5.734 ]
 [  1.       8.4084]
 [  1.       5.6407]
 [  1.       5.3794]
 [  1.       6.3654]
 [  1.       5.1301]
 [  1.       6.4296]
 [  1.       7.0708]
 [  1.       6.1891]
 [  1.      20.27  ]
 [  1.       5.4901]
 [  1.       6.3261]
 [  1.       5.5649]
 [  1.      18.945 ]
 [  1.      12.828 ]
 [  1.      10.957 ]
 [  1.      13.176 ]
 [  1.      22.203 ]
 [  1.       5.2524]
 [  1.       6.5894]
 [  1.       9.2482]
 [  1.       5.8918]
 [  1.       8.2111]
 [  1.       7.9334]
 [  1.       8.0959]
 [  1.       5.6063]
 [  1.      12.836 ]
 [  1.       6.3534]
 [  1.       5.4069]
 [  1.       6.8825]
 [  1.      11.708 ]
 [  1.       5.7737]
 [  1.       7.8247]
 [  1.       7.0931]
 [  1.       5.0702]
 [  1.       

In [40]:
plt.scatter(X[:,1], y, s=30, c='r', marker='x', linewidths=1)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s');
plt.show();

#### Gradient Descent

In [21]:
def computeCost(X, y, theta=[[0],[0]]):
    m = y.size
    J = 0
    
    h = X.dot(theta)
    
    J = 1/(2*m)*np.sum(np.square(h-y))
    
    return(J)

In [22]:
computeCost(X,y)

32.072733877455676

In [23]:
def gradientDescent(X, y, theta=[[0],[0]], alpha=0.01, num_iters=1500):
    m = y.size
    J_history = np.zeros(num_iters)
    
    for iter in np.arange(num_iters):
        h = X.dot(theta)
        theta = theta - alpha*(1/m)*(X.T.dot(h-y))
        #X is of size 97*2, so X.T is of size 2*97 and (h - y) is of size 97*1, after dot product we get 2*1, so we subtract from answer 
        J_history[iter] = computeCost(X, y, theta)
    return(theta, J_history)

In [24]:
# theta for minimized cost J
theta , Cost_J = gradientDescent(X, y)
print('theta: ',theta)
plt.plot(Cost_J)
plt.ylabel('Cost J')
plt.xlabel('Iterations');

theta:  [[-3.63029144]
 [ 1.16636235]]


In [37]:
xx = np.arange(5,23)
yy = theta[0]+theta[1]*xx

# Plot gradient descent
plt.scatter(X[:,1], y, s=30, c='r', marker='x', linewidths=1)
plt.plot(xx,yy, label='Linear regression (Gradient descent)')

# Compare with Scikit-learn Linear regression 
regr = LinearRegression()
# -1 below is for 1-D array and 1 is to put each value in []  
regr.fit(X[:,1].reshape(-1,1), y.ravel())
# or regr.fit(X[:,1].reshape(X[:-1].reshape(X.shape[0],1),1), y.ravel()) can also be used
plt.plot(xx, regr.intercept_+regr.coef_*xx, label='Linear regression (Scikit-learn GLM)')

plt.xlim(4,24)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.legend(loc=4);
plt.show();

[[  6.1101]
 [  5.5277]
 [  8.5186]
 [  7.0032]
 [  5.8598]
 [  8.3829]
 [  7.4764]
 [  8.5781]
 [  6.4862]
 [  5.0546]
 [  5.7107]
 [ 14.164 ]
 [  5.734 ]
 [  8.4084]
 [  5.6407]
 [  5.3794]
 [  6.3654]
 [  5.1301]
 [  6.4296]
 [  7.0708]
 [  6.1891]
 [ 20.27  ]
 [  5.4901]
 [  6.3261]
 [  5.5649]
 [ 18.945 ]
 [ 12.828 ]
 [ 10.957 ]
 [ 13.176 ]
 [ 22.203 ]
 [  5.2524]
 [  6.5894]
 [  9.2482]
 [  5.8918]
 [  8.2111]
 [  7.9334]
 [  8.0959]
 [  5.6063]
 [ 12.836 ]
 [  6.3534]
 [  5.4069]
 [  6.8825]
 [ 11.708 ]
 [  5.7737]
 [  7.8247]
 [  7.0931]
 [  5.0702]
 [  5.8014]
 [ 11.7   ]
 [  5.5416]
 [  7.5402]
 [  5.3077]
 [  7.4239]
 [  7.6031]
 [  6.3328]
 [  6.3589]
 [  6.2742]
 [  5.6397]
 [  9.3102]
 [  9.4536]
 [  8.8254]
 [  5.1793]
 [ 21.279 ]
 [ 14.908 ]
 [ 18.959 ]
 [  7.2182]
 [  8.2951]
 [ 10.236 ]
 [  5.4994]
 [ 20.341 ]
 [ 10.136 ]
 [  7.3345]
 [  6.0062]
 [  7.2259]
 [  5.0269]
 [  6.5479]
 [  7.5386]
 [  5.0365]
 [ 10.274 ]
 [  5.1077]
 [  5.7292]
 [  5.1884]
 [  6.3557]
 [  

In [16]:
# Predict profit for a city with population of 35000 and 70000
print(theta.T.dot([1, 3.5])*10000)
print(theta.T.dot([1, 7])*10000)

[ 4519.7678677]
[ 45342.45012945]
