# Advanced Optimization TNC Function

In [None]:
import numpy as np
import scipy.optimize as op

def sigmoid(z):
    z = np.ones(z.shape) / (1 + np.exp(-z))
    return z

def Gradient(theta,x,y):
    m,n = x.shape
    theta = theta.reshape((n,1)) 
    y = y.reshape((m,1))
    sigmoid_x_theta = sigmoid(x.dot(theta)) 
    grad = ((x.T).dot(sigmoid_x_theta-y))/m 
    return grad.flatten() 

def CostFunc(theta,x,y):
    m,n = x.shape  
    theta = theta.reshape((n,1)) 
    y = y.reshape((m,1)) 
    term1 = np.log(sigmoid(x.dot(theta))) 
    term2 = np.log(1-sigmoid(x.dot(theta))) 
    term1 = term1.reshape((m,1))
    term2 = term2.reshape((m,1))
    term = y * term1 + (1 - y) * term2 
    J = -((np.sum(term))/m) 
    return J 

m,n = X_train.shape
theta = np.random.normal(size=(n, 1))
Result = op.minimize(fun = CostFunc, 
                     x0 = theta, 
                     args = (X_train, y_train),
                     method = 'TNC',
                     jac = Gradient) 

theta = Result.x # optimal theta

# Manual Grad Descent

In [None]:
alpha = 0.0003
iterations = 1500
theta = np.matrix(np.random.normal(size=(1, X_train.shape[1]))).astype(float).T
def compute_cost(x, y, theta):
    hypothesis = sigmoid(x * theta)
    cost = -1/m * sum(np.multiply(y, np.log(hypothesis)) + np.multiply((1 - y), np.log(1 - hypothesis)))
    return cost

In [None]:
J_history = np.array([0, 0])
for iters in range(1, iterations):
    hypo = sigmoid(X_train * theta)
    theta = theta - alpha * 1/m * X_train.T * (hypo - y_train) 
    cost = np.array([iters, compute_cost(X_train, y_train, theta)], dtype=object)
    J_history = np.vstack((J_history, cost))
print(theta)
print(compute_cost(X_train, y_train, theta)) 
J_history = np.delete(J_history, 0, axis=0)
plt.figure(figsize=(5, 3), dpi=150)
plt.plot(J_history[:, 0], J_history[:, 1], linewidth=2)
plt.xlabel('# of Iterations')
plt.ylabel('Cost')
plt.show()