In [None]:
# Regularized Linear Regression in a two variable data set with nonlinear 
# decision boundary and variable mapping
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize

In [None]:
# Plot training data
Data1 = pd.read_csv("data2ex2.txt", header=None, names=['Test1','Test2','Label'])
Data1.head(3)

In [None]:
Admitted = Data1.loc[Data1['Label']==1].copy()
NotAdmitted = Data1.loc[Data1['Label']==0].copy()

plt.scatter(Admitted[['Test1']],Admitted[['Test2']],c='k',label='Admited',marker='x')
plt.scatter(NotAdmitted[['Test1']],NotAdmitted[['Test2']],c='y',label='Not Admited')
plt.title('Training Data')
plt.legend()
plt.show()


In [None]:
# Some functions need to implement this problem
def MapFeatures(x1,x2):
    # This function maps the two features into all polynomial terms.
    deg = 6
    n = x1.size
    out = np.ones((n,1))
    for i in range(1,deg+1):
        for j in range(0,i+1):
            a = ((x1**(i-j))*(x2**j)).reshape(n,1)
            out = np.hstack((out,a))
    return out

def sigmoid(z):
    return 1/(1+np.exp(-z))

def Predict(theta,X):
    # Create a function to predict values.
    h = sigmoid(X @ theta)
    c = np.zeros(h.size)
    for i,hh in enumerate(h):
        if hh >=0.5:
            c[i] = 1
        if hh < 0.5:
            c[i]= 0
    return h,c

Implementing the regularized cost function for logistic regression

The hypothesis of logistic regression is
$$h_\theta = g(\theta^Tx) $$
where $g$ is the sigmoid function defined below
$$g(z) = \frac{1}{1+e^{-z}}$$

The cost function in compact form is given by
$$J(\theta) = \frac{1}{m} \sum_{i=1}^{m}[-y^{(i)}log(h_\theta(x^{(i)})) - (1 - y^{(i)})log(1- h_\theta(x^{(i)})) ] + \frac{\lambda}{2m} \sum_{j=1}^{n}\theta_j^2 $$

In the vectorized form the cost function becomes:

$$h = g(\theta^Tx) $$

$$J(\theta) = \frac{1}{m} [-y^Tlog(h) - (1 - y)^Tlog(1- h)] + \frac{\lambda}{2m} \theta^T\theta  $$



In [None]:
# Implementing Cost Function and Gradient to be used with scipy.optimize
def LogisticCostFunction(theta,Y,X,lambd):
    #  Regularized Cost function for logistic Regression
    m = Y.size
    h = sigmoid(X @ theta)
    J = -(1/m)*(Y.T@np.log(h) + (1-Y).T@np.log(1-h)) + (lambd/(2*m))*theta[1:].T@theta[1:]
    return J

The regularized gradient for the objective function is 
$$ \frac{\partial J(\theta)}{\partial \theta_0} = \frac{1}{m} \sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)})x_j^{(i)} \quad \text{for} j=0 $$

$$ \frac{\partial J(\theta)}{\partial \theta_j} = (\frac{1}{m} \sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)})x_j^{(i)}) + \frac{\lambda}{m}\theta_j \quad \text{for } j \geq 1 $$.


or in vector form

$$ \nabla J(\theta) = \frac{1}{m}X^T(h-y) + \frac{\lambda}{m}\circ\theta_j $$ 

being lambda a vector

$$\lambda_0 = 0 \quad\text{and}\quad \lambda_i = \lambda \quad \text{for} \quad i \in{1,...,m} $$

In [None]:
def LogisticRegrGradient(theta,Y,X,lambd):
    # Gradient for regularized Logistic Regression
    m = Y.size
    h = sigmoid(X @ theta)
    lambdVec = lambd*np.ones(X.shape[1])
    lambdVec[0] = 0
#     print(lambdVec, lambdVec.shape)
    G = (1/m)*(X.T@(h-Y))+(lambd/m)*theta
#     G = (1/m)*(X[:,1:].T@(h-Y))+(lambd/m)*theta[1:]
    return G.flatten()

In [None]:
X = MapFeatures(Data1['Test1'].to_numpy(),Data1['Test2'].to_numpy())
Y = Data1['Label'].to_numpy()
m,n = X.shape

#Testing the Cost Function Value and Gradient
theta = 0*np.ones((n))
lambd = 1
print('Testing the Cost Funtion (for Theta (0,0,0) should return 0.693)',LogisticCostFunction(theta,Y,X,lambd))
print('Testing Gradient...',)
print(LogisticRegrGradient(theta,Y,X,lambd))

#The Value of 0.693 is obtained.

In [None]:
# Calculate theta* using optimization.
res = minimize(LogisticCostFunction, theta, args=(Y,X,lambd), method='CG', jac=LogisticRegrGradient,options={'disp': True,'maxiter':400})
print('Theta* values:',res.x)

In [None]:
# Plot the Decision Boundary
xx = np.linspace(Data1['Test1'].min(),Data1['Test1'].max(),50)
yy = np.linspace(Data1['Test2'].min(),Data1['Test2'].max(),50)
Z = np.zeros((xx.size,yy.size))

for i, ii in enumerate(xx):
    for j,jj in enumerate(yy):
        Z[i,j] = MapFeatures(xx[i],yy[j])@res.x;

plt.contour(xx,yy,Z.T,0)
plt.scatter(Admitted[['Test1']],Admitted[['Test2']],c='k',label='Admited',marker='x')
plt.scatter(NotAdmitted[['Test1']],NotAdmitted[['Test2']],c='y',label='Not Admited')
plt.title('Training Data')
plt.legend()
plt.show()

In [None]:
p,c = Predict(res.x,X)

accuracy = np.mean(np.equal(Y,c))
print('Accuracy:',accuracy)

What could be done next here:
- Try Different Lambda values.
- Make a plot of the Accuracy vs Lambda values.
- Explore different feature scaling.
    