## Regressão Linear Multipla

In [77]:
from statistic import correlation,standart_deviantion, mean, de_mean, median
from vectors import dot, vector_subtract, scalar_multiply
from gradiente import minimize_stochastic
import numpy as np
import matplotlib.pyplot as plt
import random

In [78]:
np.random.seed(42)

num_friends_good = np.random.randint(10,100,50)
work_hours = np.random.randint(30,80,50)
has_phd = np.random.randint(0,2,50)
ruido = np.random.randint(-15,15,50) 
daily_minutes_good = 0.8 * num_friends_good + 10 + ruido - 0.45*work_hours + 0.12*has_phd

In [79]:
def predict(x_i, beta):
    return dot(x_i,beta)

def error(x_i,y_i,beta):
    return y_i - predict(x_i,beta)

def squared_error(x_i, y_i, beta):
    return error(x_i,y_i,beta)**2

def squared_error_gradient(x_i, y_i, beta):
    return [-2*x_ij*error(x_i, y_i, beta) for x_ij in x_i]

def estimate_beta(x,y):
    beta_initial = [np.random.random() for x_i in x[0]]
    return minimize_stochastic(squared_error, squared_error_gradient, x,y, beta_initial, 0.001)

np.random.seed(0)
x = list(zip(np.zeros(len(num_friends_good)),num_friends_good,work_hours,has_phd))

beta = estimate_beta(x,daily_minutes_good)
beta


[0.5488135039273248,
 0.7151893663724195,
 0.6027633760716439,
 0.5448831829968969]

In [80]:
def total_sum_of_squares(y):
    return sum(v**2 for v in de_mean(y))

def multiple_r_squared(x,y,beta):
    sum_of_squared_errors = sum(error(x_i,y_i,beta)**2 for x_i,y_i in zip(x,y))

    return 1 - (sum_of_squared_errors/total_sum_of_squares(y))


multiple_r_squared(x,daily_minutes_good,beta)

-2.241364497925962

In [81]:
def bootstrap_sample(data):
    return [random.choice(data) for _ in data]

def bootstrap_statistic(data, stats_fn, num_samples):
    return [stats_fn(bootstrap_sample(data)) for _ in range(num_samples)]

In [82]:
# 101 pontos todos muito próximos de 100
close_to_100 = [99.5 + np.random.random() for _ in range(101)]
# 101 pontos, 50 próximos de 0, 50 próximos de 200
far_from_100 = ([99.5 + np.random.random()] +
[np.random.random() for _ in range(50)] +
[200 + np.random.random() for _ in range(50)])

In [83]:
def estimate_sample_beta(sample):
    x_sample, y_sample = zip(*sample)
    return estimate_beta(x_sample, y_sample)

In [88]:
bootstrap_beta = bootstrap_statistic(list(zip(x,daily_minutes_good)),estimate_sample_beta,100)

bootstrap_standard_error = [standart_deviantion([beta[i] for beta in bootstrap_beta]) for i in range(4)]

bootstrap_standard_error

[0.30473634411065953,
 0.30394060197394385,
 0.2784904061206433,
 0.3011111265807051]