# House predictions with linear regression using a toy dataset

In [None]:
%run Coding_linear_regression.ipynb 
# allows us to use the functions we wrote

import numpy
import matplotlib.pyplot as plt

### Defining and plotting our dataset

Let's see how various implementations of Linear Regression work on our toy dataset.

In [None]:
num_rooms = numpy.array([1,2,3,5,6,7])
price_rooms = numpy.array([155, 197, 244, 356,407,448])

dict_data = {
    'features': num_rooms,
    'labels'  : price_rooms,
}

print(dict_data)

plot_scatter(dict_data['features'], dict_data['labels'], "Number of Rooms", "Price Per Room")
plt.show()

### Linear Regression: Using our tricks

We'll call the implementation we wrote in the other notebook along with some of its functions.

In [None]:
# We set the random seed in order to always get the same results.
numpy.random.seed(0)

# This line is for the x-axis to appear in the figure
plt.ylim(0,500)
slope, bias = linear_regression(
    dict_data['features'],
    dict_data['labels'],
    trick_function = absolute_trick,
    learning_rate = 0.05,
    error_metric = rmse,
    epochs = 1000)
print('Price per room:', slope)
print('Base price:', bias)

# This line is for the x-axis to appear in the figure
plt.ylim(0,500)
slope, bias = linear_regression(
    dict_data['features'],
    dict_data['labels'],
    trick_function = square_trick,
    learning_rate = 0.01,
    error_metric = rmse,
    epochs = 10000)
print('Price per room:', slope)
print('Base price:', bias)

### Linear Regression: Using Turi Create

You can install turicreate on Linux and Mac. Works on Windows via WSL.

In [None]:
import turicreate as tc

data = tc.SFrame(dict_data)

In [None]:
data

In [None]:
model = tc.linear_regression.create(data, target='labels')

In [None]:
model.coefficients

In [None]:
new_point = tc.SFrame({'features': [4]})

In [None]:
model.predict(new_point)

### Linear Regression: Using statsmodels

In [None]:
import statsmodels.api as sm

# statsmodels doesn't automatically add an intercept (constant bias) column,
# so we have to add that ourselves.
exog = sm.add_constant(dict_data['features']) # adds an intercept column
model_linear_regression = sm.OLS(
    endog = dict_data['labels'], 
    exog = exog)
results_linear_regression = model_linear_regression.fit()

In [None]:
results_linear_regression.summary()

In [None]:
results_linear_regression.params # base-price and price per room

In [None]:
plot_scatter(
    results_linear_regression.fittedvalues, 
    results_linear_regression.resid,
    x_label = "Fitted Values",
    y_label = "Residual Values")
plt.show()

# Not sure why it plots twice
sm.qqplot(results_linear_regression.resid_pearson, line = "q")