# Regressão Logística

Brunna de Sousa Pereira Amorim

In [1]:
%matplotlib inline
import numpy as np
from sklearn import linear_model
import pandas as pd
import time

### Calculando Função logística (Sigmoid)

$\sigma (x) = \displaystyle \frac{1}{1 + e^{-x}}$

In [5]:
def logistic(w, X):
    return 1 / (1 + np.exp(-np.dot(X, w)))

### Gradiente ascendente

In [9]:
def step_gradient(w_current, X, Y, learningRate):
    grad = np.zeros((len(w_current), 1))
    
    for var in range(len(grad)):
        diff = (Y - logistic(w_current, X))
        grad[var] += np.dot(X[:,var], diff)
    
    w_current += (grad * learningRate)
    return [w_current, grad]

In [10]:
def gradient_ascendant_runner(starting_w, X,Y, learning_rate, epsilon):
    w = starting_w
    grad = np.array([np.inf,np.inf, np.inf, np.inf, np.inf])
    while (np.linalg.norm(grad)>=epsilon):
        w,grad = step_gradient(w, X, Y, learning_rate)
    return w

### Execução do algoritmo

In [11]:
data = pd.read_csv("iris.csv")
X = np.c_[np.ones(len(data)), data[['sepal-length', 'sepal-width', 'petal-length', 'petal-width']]]

data['classe'] = np.where(data['classe'] == 'Iris-setosa', 0, 1)
Y = data[['classe']]

init_w = np.zeros((5,1))
learning_rate = 0.00001
epsilon = 3.0

tic = time.time()
w = gradient_ascendant_runner(init_w, X,Y, learning_rate, epsilon)
toc = time.time()
print("Gradiente ascendente convergiu com os coeficientes {0}".format(w))
print("Versão vetorizada rodou em: " + str(1000*(toc-tic)) + " ms")


Gradiente ascendente convergiu com os coeficientes [[-0.27064657]
 [-0.4268518 ]
 [-1.4302173 ]
 [ 2.25510328]
 [ 1.02355071]]
Versão vetorizada rodou em: 35770.9999084 ms


In [12]:
data = pd.read_csv("iris.csv")
X = np.c_[np.ones(len(data)), data[['sepal-length', 'sepal-width', 'petal-length', 'petal-width']]]

data['classe'] = np.where(data['classe'] == 'Iris-setosa', 0, 1)
Y = data[['classe']]

init_w = np.zeros((5,1))
learning_rate = 0.00001
epsilon = 3.0

tic = time.time()
w = gradient_ascendant_runner(init_w, X,Y, learning_rate, epsilon)

print("foi")

foi


### Coeficientes usando o scikit

In [13]:
regressao = linear_model.LogisticRegression()
regressao.fit(X, data.classe)
print("Coeficientes usando o scikit: {0}".format(regressao.coef_))

Coeficientes usando o scikit: [[-0.25478973 -0.38180457 -1.43873414  2.26192475  1.02592844]]


### Função de predição

In [14]:
def predict(w, X):
    class_name = list(["Virginica/Versicolour","Setosa"])
    prediction = logistic(w, X)
    return [np.where(np.round(prediction), class_name[1], class_name[0] ), np.greater_equal(prediction, 0.5).astype(int)]

[name, predictions] = np.c_[predict(w, X), Y]
for pred in predictions:
    result += "Classe: " + pred[0] + " - " + str(pred[1]) + ";"

print(result)

ValueError: all the input arrays must have same number of dimensions