In [1]:
%matplotlib notebook

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pylab
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# ======================= Part 2: Plotting =======================
ex1data1 = np.genfromtxt ('ex1data1.txt', delimiter=",")
plt.figure()
plt.plot(ex1data1[:,0],ex1data1[:,1],'r+')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fab8e5c42e8>]

In [3]:
# =================== Part 3: Gradient descent ===================
X = np.c_[np.ones((len(ex1data1))), ex1data1[:,0]] # add ones
y = ex1data1[:,1]
theta = np.zeros(2) # initialize fitting parameters
m = len(y) # number of training examples

def computeCost(X, y, theta):
    m = len(X)
    return np.sum(np.power((np.matmul(X,theta) - y), 2)) / (2 * m)

In [4]:
# Some gradient descent settings
iterations = 1500;
alpha = 0.01;

print('\nTesting the cost function ...\n')
# compute and display initial cost
J = computeCost(X, y, theta);
print('With theta = [0 ; 0]\nCost computed = ' + str(J));
print('Expected cost value (approx) 32.07\n');


Testing the cost function ...

With theta = [0 ; 0]
Cost computed = 32.0727338775
Expected cost value (approx) 32.07



In [5]:
# further testing of the cost function
J = computeCost(X, y, [-1 , 2]);
print('\nWith theta = [-1 ; 2]\nCost computed = ' + str(J));
print('Expected cost value (approx) 54.24\n');


With theta = [-1 ; 2]
Cost computed = 54.242455082
Expected cost value (approx) 54.24



In [6]:
def gradientDescent(X, y, theta, alpha, iterations):
    m = len(X)
    J_history_points.append(theta)
    
    for x in range(iterations):
        hy = np.matmul(X, theta) - y
        theta = theta - alpha * np.matmul(hy, X) / m
        
        J_history.append(computeCost(X, y, theta))
        J_history_points.append(theta)
        
    return theta

In [7]:
J_history = []
J_history_points = []
print('\nRunning Gradient Descent ...\n')
# run gradient descent
theta = gradientDescent(X, y, theta, alpha, iterations)

# print theta to screen
print('Theta found by gradient descent:\n');
print(theta);
print('Expected theta values (approx)\n');
print(' -3.6303  1.1664\n\n');
plt.figure()
plt.plot(range(len(J_history)), J_history)


Running Gradient Descent ...

Theta found by gradient descent:

[-3.63029144  1.16636235]
Expected theta values (approx)

 -3.6303  1.1664




<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fab8e54da90>]

In [8]:
# Plot the linear fit
plt.figure()
plt.plot(ex1data1[:,0], ex1data1[:,1], 'r+')
plt.plot(ex1data1[:,0], np.matmul(X, theta))

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fab8e558128>]

In [9]:
predict1 = np.matmul([1, 3.5], theta)
print('For population = 35,000, we predict a profit of ' + str(predict1*10000));
predict2 = np.matmul([1, 7], theta);
print('For population = 70,000, we predict a profit of ' + str(predict2*10000));

For population = 35,000, we predict a profit of 4519.7678677
For population = 70,000, we predict a profit of 45342.4501294


In [10]:
# ============= Part 4: Visualizing J(theta_0, theta_1) =============

# Surface plot
theta0 = np.linspace(-10, 5, 100)
theta1 = np.linspace(-1, 4, 100);

def fun(t0, t1):
  return computeCost(X, y, (t0, t1))

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

pX, pY = np.meshgrid(theta0, theta1)
zs = np.array([fun(x,y) for x,y in zip(np.ravel(pX), np.ravel(pY))])
pZ = zs.reshape(pX.shape)

ax.plot_surface(pX, pY, pZ)

ax.set_xlabel('theta0')
ax.set_ylabel('theta1')
ax.set_zlabel('cost')

plt.show()

<IPython.core.display.Javascript object>

In [11]:
# Contour plot
# Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100

fig = plt.figure()
plt.contour(pX, pY, pZ, np.logspace(-2, 3, 15))
J_history_points_selected = np.array(J_history_points[0::125])
plt.plot(J_history_points_selected[:,0], J_history_points_selected[:,1], 'r+')
plt.plot(J_history_points_selected[:,0], J_history_points_selected[:,1])
plt.show()

<IPython.core.display.Javascript object>