In [None]:
import numpy as np

In [None]:
data = np.genfromtxt('../data/kc_house_train_data.csv', delimiter=',')
# since we imported them with numpy, the first row with chars 
# might be nan's, so we want to effectively ignore that.
# data = my_data[1:, :]

In [None]:
input_feature = data[1:, 5]
output_feature = data[1:, 2]

In [None]:
def simple_linear_regression(input_feature, output_feature):
    # computes the closed form solution for a linear regression
    # of 1 feature. That is, the intercept and slope of the line.
    N = np.max(input_feature.shape)
    assert(N == input_feature.shape == output_feature.shape)
    sigma_xi = np.sum(input_feature)
    sigma_yi = np.sum(output_feature)
    # ensure that no nan's are included
    assert(not (np.isnan(sigma_xi) or np.isnan(sigma_yi)))

    sigma_xiyi = np.sum(np.multiply(input_feature, output_feature))
    sigma_xixi = np.sum(np.multiply(input_feature, input_feature))

    # compute the closed form solutions for w_0 and w_1
    slope = (sigma_xiyi - sigma_xi / N * sigma_yi) / (sigma_xixi - (sigma_xi) ** 2 / N)
    intercept = sigma_yi / N - slope * sigma_xi / N
    
    return (intercept, slope)

In [None]:
def get_regression_predictions(input_feature, intercept, slope):
    # given the params of the one feat linear regression, returns
    # the predicted point.
    return (intercept + slope * input_feature)

In [None]:
(intercept, slope) = simple_linear_regression(input_feature, output_feature)
print(intercept, slope)

In [None]:
# Get a prediction for the price of a 2650 sq feet house
get_regression_predictions(2650, intercept, slope)

In [None]:
def get_residual_sum_of_squares(input_feature, output, intercept, slope):
    # given the params of the one feat linear regression, returns the 
    # residual sum of squares.
    erri = output - (intercept + slope * input_feature)
    return (np.sum(np.multiply(erri, erri)))

In [None]:
(RSS) = get_residual_sum_of_squares(input_feature, output_feature, intercept, slope)
print(RSS)

In [None]:
def inverse_regression_predictions(output, intercept, slope):
    # given the params of the one feat linear regression, returns
    # the estimated input (x).
    assert(slope != 0.)
    return ((output - intercept) / slope)

In [None]:
# Estimate the sq feet of a house sold 8 * 10 ^ 5 $
inverse_regression_predictions(800000, intercept, slope)

In [None]:
# compute the intercept and slope for different input features
in_f = data[1:, 3]
(bedroom_intercept, bedroom_slope) = simple_linear_regression(in_f, output_feature)
print(bedroom_intercept, bedroom_slope)
(RSS_bedroom) = get_residual_sum_of_squares(in_f, output_feature, 
                                            bedroom_intercept, bedroom_slope)
print(RSS_bedroom)

# TEST DATA

In [None]:
data_test = np.genfromtxt('../data/kc_house_test_data.csv', delimiter=',')
input_feature = data_test[1:, 5]
output_feature = data_test[1:, 2]

In [None]:
(RSS) = get_residual_sum_of_squares(input_feature, output_feature, intercept, slope)
print(RSS)

In [None]:
in_f = data_test[1:, 3]
(RSS_bedroom) = get_residual_sum_of_squares(in_f, output_feature, 
                                            bedroom_intercept, bedroom_slope)
print(RSS_bedroom)