# Regressão Linear com NumPy

In [1]:
import numpy as np
import math
import time
from sklearn import linear_model

### Versão Vetorizada

$MSE(\hat{w})=\frac{1}{N}(y-\hat{\mathbf{w}}^T\mathbf{x})^T(y-\hat{\mathbf{w}}^T\mathbf{x})$

In [2]:
def compute_mse_vectorized(w,X,Y):
    res = Y - np.dot(X,w)
    totalError = np.dot(res.T,res)
    return totalError / float(len(Y))

In [3]:
def step_gradient_vectorized(w_current,X,Y,learningRate):
    res = np.dot(X,w_current) - Y
    for i in range(len(w_current)):
        partial = 2 * np.dot(res, X[:, i])
        w_current[i] = w_current[i] - learningRate * partial
    return w_current

In [4]:
def gradient_descent_runner_vectorized(starting_w, X,Y, learning_rate, num_iterations):
    w = starting_w
    i = 0
    while (i <= num_iterations):
        w = step_gradient_vectorized(w, X, Y, learning_rate)
        i = i + 1
    return w

In [5]:
def using_sklearn(X, Y):
    linear = linear_model.LinearRegression()
    linear.fit(X,Y)
    return linear.intercept_, linear.coef_


In [6]:
def init_parameters(points):
    total_columns = points.shape[1] - 1
    total_rows = points.shape[0] - 1
    
    Y = points[:,total_columns][:,np.newaxis]
    Y = Y[1:]
    Y_cra = Y[:,0]
    
    X_matrix = np.zeros(total_rows * total_columns)
    X_matrix.shape = (total_rows, total_columns)
    init_w = np.zeros(total_columns)
    
    for i in range(total_columns):
        X_matrix[:,i] = points[:,0:-1][1:][:,i]
    return X_matrix, Y_cra, init_w


In [7]:
points = np.genfromtxt("sample_treino.csv", delimiter=",")
points = np.c_[np.ones(len(points)),points]

X_matrix, Y_cra, init_w = init_parameters(points)

learning_rate = 0.00001
num_iterations = 500000 #condicao de parada
print("Running...")
w = gradient_descent_runner_vectorized(init_w, X_matrix, Y_cra, learning_rate, num_iterations)
print ("Coeficientes encontrados...")
print (w)
print ("-------------------------------------")
print ("Usando o sklearn")
intercept, coef = using_sklearn(X_matrix,Y_cra) #resultados usando o sklearn
print ("Intercept: ", intercept)
print ("Coefs: ", coef)

Running...
Coeficientes encontrados...
[ 1.73398644  0.10310818  0.04660717  0.16408986  0.38136758  0.020324  ]
-------------------------------------
Usando o sklearn
Intercept:  1.73771151379
Coefs:  [ 0.          0.10304143  0.0464367   0.16409834  0.38117843  0.02027816]


In [8]:
#Comentarios:
#Para estimar os coeficientes, foi utilizado um learning_rate de 0.00001 e, como condicao de parada, o 
#num_iterations de 500000. Com esses valores, o algoritmo retornou coeficientes bem semelhantes aos 
#reportados pelo scikit learn.