In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import string

def square(x):
        return x*x
    
class LogisticRegression:
    
    def __init__(self, regularized = True):
        self.regularized = regularized
    def sigmoid(self, x):
        return 1/(1 + np.exp(-1 * x))
    def logistic_function(self, vec, theta):
        return self.sigmoid(np.dot(vec,theta))
    
    def single_value_cost(self, x_vector, theta, y_value):
        x_vector = np.append(x_vector, 1)
        
        return y_value *  np.log(self.logistic_function(x_vector, theta)) + (1 - y_value) *  np.log( 1 - self.logistic_function(x_vector, theta))
    def gradient(self,theta, X, y):  
        theta = np.matrix(theta)
        X = np.matrix(X)
        y = np.matrix(y)
        n,m = X.shape # for generality
        X0 = np.ones((n,1))
        X= np.hstack((X,X0))
        y = y.T
        parameters = int(theta.ravel().shape[1])
        grad = np.zeros(parameters)
        
        error = self.sigmoid(X * theta.T) - y
        
        for i in range(parameters):
            term = np.multiply(error, X[:,i])
            grad[i] = np.sum(term) / len(X)

        return grad 
    def reg_gradient(self, X, y, theta, lamb):
        
        return self.gradient(theta, X, y) + lamb/(len(y)) * np.append(np.array(theta[:-1]), [0])

    def cost_function(self, x, theta, y):
        m = len(y)
        return (-1/(m)) * sum([self.single_value_cost(x[i], theta, y[i]) for i in range(m)])
    
    def reg_cost_function(self, x, theta, y, lamb):
        return self.cost_function(x, theta, y) + lamb/(2 *len(y)) * np.dot(theta[:-1], theta[:-1])
    
    def grad_check(self, x, y, index, theta, lamb):
        
        func_before = self.reg_cost_function(x, theta, y, lamb)
        theta[index] += 0.0000001
        func_after = self.reg_cost_function(x, theta, y, lamb)
        theta[index] -= 0.0000001
        return (func_after- func_before) / 0.0000001
    
    def update_theta(self, theta, alpha, x, y, lamb):
        
        gradient = self.reg_gradient(x, y, theta, lamb)
        
        #check_gradient = [self.grad_check(x, y, index, theta, lamb) for index in range(len(theta))]
        
        #print(gradient)
        
        #print('Check Gradient: ', check_gradient)
        #print('Gradient: ', gradient)
        theta = np.array(theta) - alpha * np.array(gradient)
        
        return theta
    
    def initialize_theta(self, x):
        return [-1 for i in range(len(x[0]) + 1)]
    
    def scale(self, x):
        x = np.array(x)
        
        x_normed = (x - x.mean(axis = 0))/(x.std(axis = 0))
        
        return x_normed
    
    def train(self, x, y, alpha, num_times, lamb):
        new_x= self.scale(x)
        #print(new_x)
        theta = self.initialize_theta(new_x)
        print(theta)
        prev = self.reg_cost_function(new_x, theta, y, lamb)
        while num_times > 0:
            if num_times % 100 == 0:
                print('Cost Function Value: ', self.reg_cost_function(new_x, theta, y, lamb))
            theta = self.update_theta(theta, alpha, new_x, y, lamb)
            curr = self.reg_cost_function(new_x, theta, y, lamb)
            
            prev = curr
            num_times -= 1
            
        print(theta)
        
        self.theta = theta
    #def regularized_train(self, x, y, alpha, num_times, lamb):
    
    
    def test(self, x):
        return np.matrix([np.dot(self.theta, np.matrix(x_vector)) for x_vector in x]) 
                    
b = LogisticRegression()
data = pd.read_csv('C:/Users/User/Documents/BerkeleyFall2017/Kaggle/Test/ex2data1.txt')
dat = data.as_matrix().T
y = dat[-1]
x = dat[:-1].T
thet = [-1,-1 ,-1]
print(b.gradient(thet, x, y))
print(b.reg_gradient(x, y, thet, 0))
b.train(x, y, 1, 1000, 0)
        

[-44.83135362 -44.37384125  -0.6       ]
[-44.83135362 -44.37384125  -0.6       ]
[-1, -1, -1]
Cost Function Value:  1.62445532583
Cost Function Value:  0.210671668487
Cost Function Value:  0.204930185946
Cost Function Value:  0.203867195067
Cost Function Value:  0.203602729878
Cost Function Value:  0.203528948727
Cost Function Value:  0.20350721751
Cost Function Value:  0.203500635861
Cost Function Value:  0.203498612549
Cost Function Value:  0.203497985463
[ 3.98902015  3.72149087  1.71668508]
