## Behaviour of cost function

Let's understand the behaviour of the ** cost function** $J(\theta_0, \theta_1)$ as in eq: 4 on **Profit** vs **Population of the city** data as below:

In [2]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d

data = np.loadtxt('../data/ex1data1.txt', delimiter=',')
X = np.c_[np.ones(data.shape[0]),data[:,0]]
y = np.c_[data[:,1]]

def computeCost(X, y, theta=[[0],[0]]):
    m = y.size
    J = 0
    h = X.dot(theta)
    J = 1/(2*m)*np.sum(np.square(h-y))
    return(J)

# Create grid coordinates for plotting
B0 = np.linspace(-10, 10, 50)
B1 = np.linspace(-1, 4, 50)
xx, yy = np.meshgrid(B0, B1, indexing='xy')
Z = np.zeros((B0.size,B1.size))

# Calculate Z-values (Cost) based on grid of coefficients
for (i,j),v in np.ndenumerate(Z):
    Z[i,j] = computeCost(X,y, theta=[[xx[i,j]], [yy[i,j]]])

fig = plt.figure(figsize=(12,4))
ax1 = fig.add_subplot(131)
ax1.scatter(X[:,1], y, s=30, c='r', marker='x', alpha=0.5)
ax1.set_xlim(4,24)
ax1.set_xlabel('Population of City in 10,000s')
ax1.set_ylabel('Profit in $10,000s');
ax1.set_title('Training data')

ax2 = fig.add_subplot(132, projection='3d')

xvals = np.arange(-10,10,.5)
yvals = np.arange(-1,4,.1)
myxs, myys, myzs = [], [], []
for david in xvals:
    for kaleko in yvals:
        myxs.append(david)
        myys.append(kaleko)
        myzs.append(computeCost(X,y,np.array([[david], [kaleko]])))

scat = ax2.scatter(myxs,myys,myzs,c=np.abs(myzs),cmap=plt.get_cmap('cool'))
ax2.set_xlabel(r'$\theta_0$',fontsize=16)
ax2.set_ylabel(r'$\theta_1$',fontsize=16)
ax2.set_title(r'Cost fn: $J(\theta_0, \theta_1) = \frac{1}{2m} \sum_{i=1}^m \left(h_\theta(x^{(i)}) - y^{(i)}\right)^2 $',fontsize=12)

ax3 = fig.add_subplot(133)
ax3.contour(xx, yy, Z, np.logspace(-2, 3, 20), cmap=plt.cm.jet)
ax3.scatter(theta[0],theta[1], c='r')
ax3.set_xlabel(r'$\theta_0$',fontsize=16)
ax3.set_ylabel(r'$\theta_1$',fontsize=16)
ax3.set_title(r'Cost fn: $J(\theta_0, \theta_1)$ contour map',fontsize=12)

fig.tight_layout() # Adjust spaces between diffrent axes automatically
plt.show()    


NameError: name 'theta' is not defined

In [25]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d

data = '../data/ex1data1.txt'
cols = np.loadtxt(data,delimiter=',',usecols=(0,1),unpack=True) #Read in comma separated data
#Form the usual "X" matrix and "y" vector
X = np.transpose(np.array(cols[:-1]))
y = np.transpose(np.array(cols[-1:]))
m = y.size # number of training examples
#Insert the usual column of 1's into the "X" matrix
X = np.insert(X,0,1,axis=1)



cx = np.c_[np.ones(cols.shape[0]),cols[:,0]]
cy = np.c_[cols[:,1]]

def computeCostCC(X, y, theta=[[0],[0]]):
    m = y.size
    J = 0
    h = X.dot(theta)
    J = 1/(2*m)*np.sum(np.square(h-y))
    return(J)

# Create grid coordinates for plotting
B0 = np.linspace(-10, 10, 50)
B1 = np.linspace(-1, 4, 50)
cz = np.zeros((B0.size,B1.size))

# Calculate Z-values (Cost) based on grid of coefficients
for (i,j),v in np.ndenumerate(cz):
    theta=[[cx[i,j]], [cy[i,j]]]
    cz[i,j] = computeCost(cx,cy, theta)
    
    
iterations = 1500
alpha = 0.01

def computeCost(mytheta,X,y): #Cost function
    """
    theta_start is an n- dimensional vector of initial theta guess
    X is matrix with n- columns and m- rows
    y is a matrix with m- rows and 1 column
    """
    #note to self: *.shape is (rows, columns)
    return float((1./(2*m)) * np.dot((h(mytheta,X)-y).T,(h(mytheta,X)-y)))

#Test that running computeCost with 0's as theta returns 32.07:

initial_theta = np.zeros((X.shape[1],1)) #(theta is a vector with n rows and 1 columns (if X has n features) )

#Actual gradient descent minimizing routine
def descendGradient(X, theta_start = np.zeros(2)):
    """
    theta_start is an n- dimensional vector of initial theta guess
    X is matrix with n- columns and m- rows
    """
    theta = theta_start
    jvec = [] #Used to plot cost as function of iteration
    thetahistory = [] #Used to visualize the minimization path later on
    for meaninglessvariable in range(iterations):
        tmptheta = theta
        jvec.append(computeCost(theta,X,y))
        # Buggy line
        #thetahistory.append(list(tmptheta))
        # Fixed line
        thetahistory.append(list(theta[:,0]))
        #Simultaneously updating theta values
        for j in range(len(tmptheta)):
            tmptheta[j] = theta[j] - (alpha/m)*np.sum((h(initial_theta,X) - y)*np.array(X[:,j]).reshape(m,1))
        theta = tmptheta
    return theta, thetahistory, jvec

#Actually run gradient descent to get the best-fit theta values
initial_theta = np.zeros((X.shape[1],1))
theta, thetahistory, jvec = descendGradient(X,initial_theta)

fig = plt.figure(figsize=(12,12))
ax = fig.gca(projection='3d')

xvals = np.arange(-10,10,.5)
yvals = np.arange(-1,4,.1)
myxs, myys, myzs = [], [], []
for david in xvals:
    for kaleko in yvals:
        myxs.append(david)
        myys.append(kaleko)
        myzs.append(computeCost(np.array([[david], [kaleko]]),X,y))

scat = ax.scatter(myxs,myys,myzs,c=np.abs(myzs),cmap=plt.get_cmap('cool'))

# ax.xlabel(r'$\theta_0$',fontsize=30)
# ax.ylabel(r'$\theta_1$',fontsize=30)
# ax.title('Cost (Minimization Path Shown in Blue)',fontsize=30)
ax.plot([x[0] for x in thetahistory],[x[1] for x in thetahistory],jvec,'bo-')


plt.show()


ValueError: shapes (2,1) and (2,2) not aligned: 1 (dim 1) != 2 (dim 0)