# Regressão Logística

#### Imports:

In [406]:
%matplotlib inline
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from numpy import *
import sys
import csv

#### Preprocessing data

In [453]:
input_filename = "iris.data.txt"

att = pd.read_csv(input_filename, sep=',', header=None)

H = att.iloc[:,0:-1] # Get content to be trained
H = np.c_[np.ones(len(H)), H]

y = np.where(att.iloc[:,-1]=="Iris-setosa", 1.0, 0.0)

#### Compute norma:

$\left| \left| \nabla l(w^{(t)}) \right| \right| = \sqrt{\displaystyle \sum_{i=1}^{m}  \left( w_{i}^{(t)} \right)^{2} }$

I implement Logistic Regression based on Multiple Linear Regression implementation.

In [439]:
def compute_norma(vector):
    norma = np.sqrt( np.sum( vector ** 2 ) )
    return norma

#### Compute step gradient to train Multiple Linear Regression

$\frac{\partial l(w)}{\partial w_{j} } = \displaystyle \sum{ \left( y - \frac{1}{1+e^{-\theta^{t}x}} \right) x }$

In [456]:
def step_gradient(H, w_current, y, learning_rate):
    diff = y - ( 1 / ( 1 + exp( -np.dot( H, w_current ) ) ) )
    
    partial = np.sum( ( diff * ( H.transpose() ) ).transpose(), axis = 0 )
    
    norma = compute_norma(partial)

    w = w_current + ( learning_rate * partial )

    return [w, norma]

#### Compute complete gradient ascending:

In [457]:
def gradient_ascendent(H, y, learning_rate, epsilon):
    w = np.zeros((H.shape[1])) #has the same size of output
    num_iterations = 0
    gradient = 1
    
    while(gradient > epsilon):
        [w, gradient] = step_gradient(H, w, y, learning_rate)
        num_iterations += 1

    return [w, num_iterations, gradient]

Running the logistic regression:

In [458]:
learning_rate = 0.0053
epsilon = 0.001
[w, num_iterations, norm_gradient] = gradient_ascendent(H, y, learning_rate, epsilon)
print("Norma: {0}\nw: {1}\nnum_iterations: {2}\n\n".format(norm_gradient, w, num_iterations))

Norma: 0.000999996847564
w: [ 0.98421228  1.6321127   5.21835306 -8.47674735 -4.2136109 ]
num_iterations: 217613




Computing the same values with Scikit-learn

In [435]:
reg = LogisticRegression(tol=0.00001)
reg.fit(H[:,1:], y)
print("\nCoef with scikit-learn: {0}".format(reg.coef_))
print("\nIntercept with scikit-learn: {0}".format(reg.intercept_))


Coef with scikit-learn: [[ 0.41498833  1.46129739 -2.26214118 -1.0290951 ]]

Intercept with scikit-learn: [ 0.26560617]
