**LINEAR REGRESSION WITH ONE VARIABLE**

In [0]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D  # needed to plot 3-D surfaces

# tells matplotlib to embed plots within the notebook
%matplotlib inline


In [0]:

# Read comma separated data
data = np.loadtxt(os.path.join('Data', 'ex1data1.txt'), delimiter=',')
X, y = data[:, 0], data[:, 1]

m = y.size  # number of training examples

OSError: ignored

In [0]:
# Add a column of ones to X. The numpy function stack joins arrays along a given axis. 
# The first axis (axis=0) refers to rows (training examples) 
# and second axis (axis=1) refers to columns (features).

# Assert raises an exception if the cell is run again without initializing X.
#DO NOT EXECUTE MORE THAN ONCE
assert X.shape == (97,), "X already contains a stack ones."

X = np.stack([np.ones(m), X], axis=1)

In [0]:
def computeCost(X, y, theta):
    """
    Compute cost for linear regression. Computes the cost of using theta as the
    parameter for linear regression to fit the data points in X and y.
    
    Parameters
    ----------
    X : array_like
        The input dataset of shape (m x n+1), where m is the number of examples,
        and n is the number of features. We assume a vector of one's already 
        appended to the features so we have n+1 columns.
    
    y : array_like
        The values of the function at each data point. This is a vector of
        shape (m, ).
    
    theta : array_like
        The parameters for the regression function. This is a vector of 
        shape (n+1, ).
    
    Returns
    -------
    J : float
        The value of the regression cost function.
    
    Instructions
    ------------
    Compute the cost of a particular choice of theta. 
    You should set J to the cost.
    """
    
    # initialize some useful values
    m = y.size  # number of training examples
    
    # You need to return the following variables correctly
    J = 0.0
    
    # ====================== YOUR CODE HERE =====================

    
    # ===========================================================
    return J

In [0]:
J1 = computeCost(X, y, theta=np.array([0.0, 0.0]))
print('With theta = [0, 0] \nCost computed = %.2f' % J1)
print('Expected cost value (approximately) 32.07\n')

# further testing of the cost function
J2 = computeCost(X, y, theta=np.array([-1, 2]))
print('With theta = [-1, 2]\nCost computed = %.2f' % J2)
print('Expected cost value (approximately) 54.24\n')

limit1 = 1.2
est_Error1 = abs(J1 - 32) + abs(J2 - 54)

if(est_Error1 <= limit1) : 
    print("Great work! You may proceed.")
else :
    print("You may want to check your implementation.")
    print("[Estimated error : %.2f]" % est_Error1)
    assert False, "******* Results don't match. *******"

In [0]:
def gradientDescent(X, y, theta, alpha, num_iters):
    """
    Performs gradient descent to learn `theta`. Updates theta by taking `num_iters`
    gradient steps with learning rate `alpha`.
    
    Parameters
    ----------
    X : array_like
        The input dataset of shape (m x n+1).
    
    y : arra_like
        Value at given features. A vector of shape (m, ).
    
    theta : array_like
        Initial values for the linear regression parameters. 
        A vector of shape (n+1, ).
    
    alpha : float
        The learning rate.
    
    num_iters : int
        The number of iterations for gradient descent. 
    
    Returns
    -------
    theta : array_like
        The learned linear regression parameters. A vector of shape (n+1, ).
    
    J_history : list
        A python list for the values of the cost function after each iteration.
    
    Instructions
    ------------
    Peform a single gradient step on the parameter vector theta.

    While debugging, it can be useful to print out the values of 
    the cost function (computeCost) and gradient here.
    """
    # Initialize some useful values
    m = y.shape[0]  # number of training examples
    
    # make a copy of theta, to avoid changing the original array, since numpy arrays
    # are passed by reference to functions
    theta = theta.copy()
    
    J_history = [] # Use a python list to save cost in every iteration

    for i in range(num_iters):
        # ==================== YOUR CODE HERE =================================
        
        
        # =====================================================================
        
        # save the cost J in every iteration
        J_history.append(computeCost(X, y, theta))
    
    print("----------------------------------------------------\n")
    return theta, J_history

In [0]:
# Initialize fitting parameters
theta = np.zeros(2)

# Model hyperparameters. 
# # ==================== YOUR CODE HERE =================================
# Feel free to change these if they don't seem to working.

alpha = 0.01
iterations = 15

# QUESTION : What is the largest 'alpha' and smallest 'iterations' you can get away with ?

# =====================================================================

theta, J_history = gradientDescent(X ,y, theta, alpha, iterations)
print('Theta found by gradient descent: {:.4f}, {:.4f}'.format(*theta))
print('Expected theta values (approximately): [-3.6303, 1.1664]')

In [0]:
# Validate:

limit2 = 1
est_Error2 = abs(theta[0] + 3.6303) + abs(theta[1] - 1.664)

if(est_Error2 <= limit2) : 
    print("Great work! Proceed to the Optional Exercises.")
else :
    print("You may want to check your implementation.")
    print("[Estimated error : %.2f]" % est_Error2)
    assert False, "******* Results don't match. *******"

In [0]:
# plot the linear fit
plotData(X[:, 1], y)
pyplot.plot(X[:, 1], np.dot(X, [-3.6303, 1.1664]), '-', c='black', linewidth=5)
pyplot.plot(X[:, 1], np.dot(X, theta), '--', c='green')
pyplot.legend(['Training data', 'Optimal Value', 'Your Output']);

In [0]:
predict1 = np.dot([1, 3.5], theta)
print('For population = 35,000, we predict a profit of {:.2f}\n'.format(predict1*10000))

predict2 = np.dot([1, 7], theta)
print('For population = 70,000, we predict a profit of {:.2f}\n'.format(predict2*10000))