In [1]:
%matplotlib inline
from mpl_toolkits import mplot3d
import scipy.optimize as op
import scipy.io as io
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=(20.0,10.0)

def sigmoid(z):
    g = np.zeros((z.shape[0],1))
    g=1/(1+np.exp(-z))
    return g

def costfunc(theta, X, y, lambda1):
    m= X.shape[0]
    n=X.shape[1]
    theta = theta.reshape((n,1));
    y = y.reshape((m,1));
    J = -((np.dot(y.T,np.log(sigmoid(np.dot(X,theta))))+(np.dot((1-y).T,np.log(1-sigmoid(np.dot(X,theta))))))/m)+((np.sum(theta**2)-theta[0]**2)*(lambda1/(2*m)))
    return J

def gradient(theta, X, y, lambda1):
    m= X.shape[0]
    n=X.shape[1]
    theta = theta.reshape((n,1));
    y = y.reshape((m,1))
    temp = ((X.T).dot(sigmoid(X.dot(theta))-y))/m
    grad = (np.dot((X.T),(sigmoid(np.dot(X,theta))-y))/m)+(theta*(lambda1/m))
    grad[0]=temp[0]
    grad=grad.flatten()
    return grad

input_layer_size  = 400
num_labels = 10

mat = io.loadmat('ex3data1.mat')
X=mat['X']
y=mat['y']
m = X.shape[0]

theta_t = np.array([-2, -1, 1, 2]).reshape(-1,1)
X_t = np.concatenate((np.ones((5,1)),np.array(range(1,16)).reshape(3,5).T/10), axis=1)
y_t=np.array([1,0,1,0,1]).reshape(-1,1)
lambda_t = 3

J = 0
grad = np.zeros(theta_t.shape[0])
J = costfunc(theta_t, X_t, y_t, lambda_t)
grad=gradient(theta_t, X_t, y_t, lambda_t)

print('\nCost:', J)
print('Expected cost: 2.534819\n')
print('Gradients:\n')
print(grad)
print('\nExpected gradients:\n')
print(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n')

lambda1 = 0.1
[m,n]=X.shape

y=y-1
all_theta = np.zeros((num_labels, n + 1))
X = np.concatenate((np.ones((m,1)),X),axis=1)
for c in range(0,num_labels):
  y1=(y==c)*1
  initial_theta = np.zeros((n+1, 1)).reshape(-1,1)
  res=op.minimize(fun=costfunc, x0=initial_theta, args=(X,y1,lambda1), jac=gradient, method='CG', options={'maxiter':300})
  theta=res['x'].reshape(-1,1)
  all_theta[c,:] = theta.T

num_labels = all_theta.shape[0]
p = np.zeros((m, 1))
h=sigmoid(np.dot(X,all_theta.T))
ix=np.argmax(h, axis=1).reshape(-1,1)
p=ix
p=(p==y)*1
print('\nTraining Set Accuracy:', np.mean(p) * 100)


Cost: [[2.5348194]]
Expected cost: 2.534819

Gradients:

[ 0.14656137 -0.54855841  0.72472227  1.39800296]

Expected gradients:

 0.146561
 -0.548558
 0.724722
 1.398003


Training Set Accuracy: 96.46000000000001


In [3]:
input_layer_size  = 400
hidden_layer_size = 25
num_labels = 10

mat = io.loadmat('ex3data1.mat')
X=mat['X']
y=mat['y']
m = X.shape[0]

weights=io.loadmat('ex3weights.mat')
Theta1=weights['Theta1']
Theta2=weights['Theta2']

y=y-1
p = np.zeros((m, 1))
X = np.concatenate((np.ones((m,1)),X),axis=1)
a=sigmoid(np.dot(X,Theta1.T))
a=np.concatenate((np.ones((m,1)),a),axis=1)
h=sigmoid(np.dot(a,Theta2.T))
ix=np.argmax(h, axis=1).reshape(-1,1)
p=ix
p=(p==y)*1
print('\nTraining Set Accuracy:', np.mean(p) * 100)


Training Set Accuracy: 97.52
