## Logistic Regression

Using classes this time

In [1]:
import numpy as np
import pandas as pd
df = pd.read_csv("data/aps.csv")
df = df.reindex(np.random.permutation(df.index))
df = df.drop("ID", axis =1)
df.head()

Unnamed: 0,PLACE,PLACE3,AGE,RACE,GENDER,NEURO,EMOT,DANGER,ELOPE,LOS,BEHAV,CUSTD,VIOL
471,3,2,15.35934,0,0,1,0,2,1,13,7,0,1
299,1,0,14.82546,1,0,3,0,3,0,10,3,0,1
63,1,0,15.85489,1,1,0,0,3,0,5,7,1,1
53,2,1,12.41615,1,1,3,0,3,1,9,9,1,1
127,2,1,15.19507,1,1,0,0,2,1,11,5,1,1


In [2]:
class LogisticRegression():
    
    def __init__(self, data_train, target_train, iters, alpha=0.01):
        
        self.n = data_train.shape[1]
        self.X_train = data_train
        self.y_train = np.reshape(target_train, (target_train.shape[0], 1))
        self.theta = np.reshape(np.random.uniform(-1, 1, self.n), (self.n, 1))
        self.iters = iters
        self.alpha = alpha
    
    def sigmoid(self):
        
        mid = np.dot(self.X_train, self.theta)
        final = 1.0 / (1. + np.exp(-1. * mid))
        
        return final 
    
    def gradient(self):
        
        mid = (self.sigmoid() - self.y_train)
        pen = np.dot(mid.T, self.X_train)
        
        return ((self.alpha / self.n) * pen).T
    
    def update_weights(self):

        # print self.theta
        self.theta = np.subtract(self.theta, self.gradient())
        
    def train(self):
        
        for i in range(self.iters):
            self.update_weights()
            
    def predict(self, X_test):
        
        mid = np.dot(X_test, self.theta)
        score = 1.0 / (1. + np.exp(-1. * mid))
        
        binary = [1 if score[i] > 0.5 else 0 for i in range(len(score))]
        # print binary[:10]
        
        return binary
    
    def evaluate(self, predictions, target_test):
    
        return np.mean(predictions == target_test)

In [4]:

def main():
    
    data = df.iloc[:,:-1]
    target = df.iloc[:, -1]
    cutoff = int(len(data) * .8)
    X_train, X_test = data[:cutoff], data[cutoff:]
    y_train, y_test = target[:cutoff], target[cutoff:]

    lr = LogisticRegression(X_train, y_train, 10)
    lr.train()

    predictions = lr.predict(X_test)
    
    accuracy = lr.evaluate(predictions, y_test)
    
    print "accuracy: ", accuracy
    
main()

    

accuracy:  0.852941176471
