In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import string

def square(x):
        return x*x
    
class LogisticRegression:
    
    def __init__(self, regularized = True):
        self.regularized = regularized
    def sigmoid(self, x):
        return 1/(1 + np.exp(-1 * x))
    def logistic_function(self, vec, theta):
        return self.sigmoid(np.dot(vec,theta))
    
    def single_value_cost(self, x_vector, theta, y_value):
        x_vector = np.append(x_vector, 1)
        
        return y_value *  np.log(self.logistic_function(x_vector, theta)) + (1 - y_value) *  np.log( 1 - self.logistic_function(x_vector, theta))
    def gradient(self,theta, X, y):  
        theta = np.matrix(theta)
        X = np.matrix(X)
        y = np.matrix(y)
        n,m = X.shape # for generality
        X0 = np.ones((n,1))
        X= np.hstack((X,X0))
        y = y.T
        parameters = int(theta.ravel().shape[1])
        grad = np.zeros(parameters)
        
        error = self.sigmoid(X * theta.T) - y
        
        for i in range(parameters):
            term = np.multiply(error, X[:,i])
            grad[i] = np.sum(term) / len(X)

        return grad 
    def reg_gradient(self, X, y, theta, lamb):
        
        return self.gradient(theta, X, y) + lamb/(len(y)) * np.append(np.array(theta[:-1]), [0])

    def cost_function(self, x, theta, y):
        m = len(y)
        return (-1/(m)) * sum([self.single_value_cost(x[i], theta, y[i]) for i in range(m)])
    
    def reg_cost_function(self, x, theta, y, lamb):
        print('Theta: ', theta)
        print('All But Last: ', theta[:-1])
        print('Regularization Term: ', np.dot(theta[:-1]), np.dot(theta[:-1]))
        return self.cost_function(x, theta, y) + lamb/(2 *len(y)) * np.dot(theta[:-1], theta[:-1])
    
    def grad_check(self, x, y, index, theta, lamb):
        
        func_before = self.reg_cost_function(x, theta, y, lamb)
        theta[index] += 0.0000001
        func_after = self.reg_cost_function(x, theta, y, lamb)
        theta[index] -= 0.0000001
        return (func_after- func_before) / 0.0000001
    
    def update_theta(self, theta, alpha, x, y, lamb):
        
        gradient = self.reg_gradient(x, y, theta, lamb)
        
        #check_gradient = [self.grad_check(x, y, index, theta, lamb) for index in range(len(theta))]
        
        #print(gradient)
        
        #print('Check Gradient: ', check_gradient)
        #print('Gradient: ', gradient)
        theta = np.array(theta) - alpha * np.array(gradient)
        
        return theta
    
    def initialize_theta(self, x):
        return [-1 for i in range(len(x[0]) + 1)]
    
    def scale(self, x):
        x = np.array(x)
        
        x_normed = (x - x.mean(axis = 0))/(x.std(axis = 0))
        
        return x_normed
    
    def train(self, x, y, alpha, num_times, lamb):
        new_x= self.scale(x)
        #print(new_x)
        theta = self.initialize_theta(new_x)
        print(theta)
        prev = self.reg_cost_function(new_x, theta, y, lamb)
        while num_times > 0:
            if num_times % 100 == 0:
                print('Cost Function Value: ', self.reg_cost_function(new_x, theta, y, lamb))
                print('Gradient: ', self.reg_gradient(x, y, theta, lamb))
            theta = self.update_theta(theta, alpha, new_x, y, lamb)
            curr = self.reg_cost_function(new_x, theta, y, lamb)
            
            prev = curr
            num_times -= 1
            
        print(theta)
        
        self.theta = theta
    #def regularized_train(self, x, y, alpha, num_times, lamb):
    
    
    def test(self, x):
        new_x = self.scale(x)
        for x_vector in np.array(x):
            x_vector = np.append(x_vector, 1)
        return np.matrix([np.dot(self.theta, np.matrix(np.append(x_vector,1)) for x_vector in x]) 
                    
b = LogisticRegression()
data = pd.read_csv('Test/ex2data1.txt')
dat = data.as_matrix().T
y = dat[-1]
x = dat[:-1].T
print(x)
print(y)
b.train(x, y, 1, 1000, 0)
        

[[ 34.62365962  78.02469282]
 [ 30.28671077  43.89499752]
 [ 35.84740877  72.90219803]
 [ 60.18259939  86.3085521 ]
 [ 79.03273605  75.34437644]
 [ 45.08327748  56.31637178]
 [ 61.10666454  96.51142588]
 [ 75.02474557  46.55401354]
 [ 76.0987867   87.42056972]
 [ 84.43281996  43.53339331]
 [ 95.86155507  38.22527806]
 [ 75.01365839  30.60326323]
 [ 82.30705337  76.4819633 ]
 [ 69.36458876  97.71869196]
 [ 39.53833914  76.03681085]
 [ 53.97105215  89.20735014]
 [ 69.07014406  52.74046973]
 [ 67.94685548  46.67857411]
 [ 70.66150955  92.92713789]
 [ 76.97878373  47.57596365]
 [ 67.37202755  42.83843832]
 [ 89.67677575  65.79936593]
 [ 50.53478829  48.85581153]
 [ 34.21206098  44.2095286 ]
 [ 77.92409145  68.97235999]
 [ 62.27101367  69.95445795]
 [ 80.19018075  44.82162893]
 [ 93.1143888   38.80067034]
 [ 61.83020602  50.25610789]
 [ 38.7858038   64.99568096]
 [ 61.37928945  72.80788731]
 [ 85.40451939  57.05198398]
 [ 52.10797973  63.12762377]
 [ 52.04540477  69.43286012]
 [ 40.23689374