In [1]:
import numpy as np
import math
import time
import matplotlib.pyplot as plt
from sklearn import linear_model
import scipy.stats as st

## Mean Squared Error
$MSE(\hat{w})=\frac{1}{N}(y-\hat{\mathbf{w}}^T\mathbf{x})^T(y-\hat{\mathbf{w}}^T\mathbf{x})$

In [3]:
def compute_mse_vectorized(w,X,Y):
    '''This function returns de MSE for a given dataset and coefficients'''
    res = Y - np.dot(X,w)
    totalError = np.dot(res.T,res)
    return totalError / float(len(Y))

## Regressão Logística Vetorizada

In [6]:
def step_gradient_vectorized(w_current,X,Y,alpha):
    '''This function calculates the step gradient using alpha value as stepsize.'''
    w = w_current
    Y_pred = np.dot(X, w) #valores previstos com o vetor de coeficientes atual
    res = np.subtract(Y,Y_pred) #resíduos entre Y observados e Y previstos
    gradient_rss = -2*np.dot(X.T,res) #vetor de derivadas parciais  
    new_w = np.add(w ,alpha*(gradient_rss))
    return [new_w, gradient_rss]

In [5]:
def gradient_ascent_runner_vectorized(starting_w, X,Y, learning_rate, epsilon):
    '''This function returns the coefficients' vector'''
    w = starting_w
    grad = np.array([np.inf,np.inf,np.inf,np.inf,np.inf])  
    i = 0
    while(np.linalg.norm(grad) >= epsilon):
        w, grad = step_gradient_vectorized(w, X, Y, learning_rate)
    #         if i % 1000 == 0:
    #             print("MSE na iteração {0} é de {1}".format(i,compute_mse_vectorized(w, X, Y)))
    #             print("grad norm: {0}".format(np.linalg.norm(grad)))
    #         i+= 1
    return w

In [12]:
points = np.genfromtxt("../data/iris.csv", delimiter=",", dtype="str")
points = points[1:]

In [13]:
points

array([['4.9', '3.0', '1.4', '0.2', 'setosa'],
       ['4.7', '3.2', '1.3', '0.2', 'setosa'],
       ['4.6', '3.1', '1.5', '0.2', 'setosa'],
       ['5.0', '3.6', '1.4', '0.2', 'setosa'],
       ['5.4', '3.9', '1.7', '0.4', 'setosa'],
       ['4.6', '3.4', '1.4', '0.3', 'setosa'],
       ['5.0', '3.4', '1.5', '0.2', 'setosa'],
       ['4.4', '2.9', '1.4', '0.2', 'setosa'],
       ['4.9', '3.1', '1.5', '0.1', 'setosa'],
       ['5.4', '3.7', '1.5', '0.2', 'setosa'],
       ['4.8', '3.4', '1.6', '0.2', 'setosa'],
       ['4.8', '3.0', '1.4', '0.1', 'setosa'],
       ['4.3', '3.0', '1.1', '0.1', 'setosa'],
       ['5.8', '4.0', '1.2', '0.2', 'setosa'],
       ['5.7', '4.4', '1.5', '0.4', 'setosa'],
       ['5.4', '3.9', '1.3', '0.4', 'setosa'],
       ['5.1', '3.5', '1.4', '0.3', 'setosa'],
       ['5.7', '3.8', '1.7', '0.3', 'setosa'],
       ['5.1', '3.8', '1.5', '0.3', 'setosa'],
       ['5.4', '3.4', '1.7', '0.2', 'setosa'],
       ['5.1', '3.7', '1.5', '0.4', 'setosa'],
       ['4.6'

In [20]:
def predict(Y):
    '''It sets the classes's values to 0 or 1 integers'''
    levels = np.unique(Y)
    resp = np.where(Y == levels[0],0,1)
    return resp

In [21]:
X = points[:,[0,1,2,3]].astype("float")
Y = predict(points[:,[4]])

In [22]:
Y

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
    

In [23]:
num_coeficients = X.shape[1]
init_w = np.zeros((num_coeficients,1))

learning_rate = 0.000001
epsilon = 0.5

In [None]:
start_grad_asc_runner_time = time.time()
w = gradient_ascent_runner_vectorized(init_w, X, Y, learning_rate, epsilon)
end_grad_asc_runner_time = time.time()
print('time: {0}'.format(end_grad_asc_runner_time - start_grad_asc_runner_time))

In [None]:
# The coefficients
print('Coefficients: \n', w)

## Regressão com sklearn

In [None]:
start_sklearn_reg = time.time()
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(X, Y)
end_sklearn_reg = time.time()
print('time: {0}'.format(end_sklearn_reg - start_sklearn_reg))
# The coefficients
print('Coefficients: \n', regr.coef_.T)