## Make Prediction Using Linear Model

In [71]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import cross_val_score

In [72]:
from jupyterthemes import jtplot
jtplot.style()

In [73]:
from preprocessing import PrepareData

In [74]:
ppd = PrepareData()
df, sales = ppd.get_train_data()
sale_price = df[['SalePrice']].values

In [75]:
# df.drop(ppd.sales_attrs, axis=1, inplace=True)
df.drop("SalePrice", axis=1, inplace=True)

In [76]:
values = df.values
y = sale_price.reshape(-1, 1)

In [77]:
# split into train and test sets
n_train_time = int(len(y)*0.9)
train_x = values[:n_train_time, :]
train_y = y[:n_train_time]
test_x = values[n_train_time:, :]
test_y = y[n_train_time:]

In [8]:
lr = linear_model.LinearRegression()
model = lr.fit(train_x, train_y)



In [None]:
predictions = model.predict(test_x)
actual_values = test_y

In [None]:
print "\nR^2 is:", model.score(test_x, test_y)
print 'RMSE is:', mean_squared_error(test_y , predictions)

plt.scatter(predictions, actual_values, alpha=.75,
            color='b') #alpha helps to show overlapping data
plt.xlabel('Predicted Price')
plt.ylabel('Actual Price')
plt.title('Linear Regression Model')
plt.show()

In [13]:
test_df, test_ids = ppd.get_test_data()

In [14]:
def convert_to_price(sales, predict_results):
    max_s = sales.SalePrice.max()
    min_s = sales.SalePrice.min()
    predicted_p = predict_results*(max_s - min_s) + min_s
    return predicted_p

In [9]:
lr = linear_model.LinearRegression()
print cross_val_score(lr, values, y, cv=5)
model = lr.fit(values, y)

[0.83006675 0.82452187 0.81233194 0.8540777  0.64740002]


In [101]:
params = {
    'alpha_1': range(-6, 6),
    'alpha_2': range(-6, 6),
    'lambda_1': range(-6, 6),
    'lambda_2': range(-6, 6),
}

In [102]:
params

{'alpha_1': [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
 'alpha_2': [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
 'lambda_1': [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
 'lambda_2': [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]}

In [103]:
from sklearn.model_selection import GridSearchCV

In [104]:
clf = GridSearchCV(linear_model.BayesianRidge(), params, cv=5)

In [105]:
clf.fit(values, y)

KeyboardInterrupt: 

In [None]:
clf.cv_results_['mean_test_score']

In [80]:
for i in range(-6, 6):
    for j in range(-6, 6): 
        alpha_1 = 10**i
        alpha_2 = 10**j
        lr = linear_model.BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2)
        print "alpha_1:",alpha_1, "alpha_2", alpha_2
        print cross_val_score(lr, values, y, cv=3)
#   model = lr.fit(values, y)

alpha_1: 1e-06 alpha_2 1e-06
[0.8797898  0.84138854 0.77069013]
alpha_1: 1e-06 alpha_2 1e-05
[0.87978979 0.8413886  0.77069072]
alpha_1: 1e-06 alpha_2 0.0001
[0.87978971 0.84138917 0.77069661]
alpha_1: 1e-06 alpha_2 0.001
[0.87978893 0.84139487 0.77075543]
alpha_1: 1e-06 alpha_2 0.01
[0.87978152 0.84144544 0.77133705]
alpha_1: 1e-06 alpha_2 0.1
[0.87974371 0.84149288 0.77653528]
alpha_1: 1e-06 alpha_2 1
[0.87840538 0.8341157  0.79684479]
alpha_1: 1e-06 alpha_2 10
[0.82088202 0.76298927 0.77013123]
alpha_1: 1e-06 alpha_2 100
[0.53064071 0.47627258 0.5122142 ]
alpha_1: 1e-06 alpha_2 1000
[0.01582657 0.01607532 0.01559836]
alpha_1: 1e-06 alpha_2 10000
[0.00173657 0.00492814 0.00324935]
alpha_1: 1e-06 alpha_2 100000
[0.0005106  0.00232149 0.00037301]
alpha_1: 1e-05 alpha_2 1e-06
[0.8797898  0.84138854 0.77069013]
alpha_1: 1e-05 alpha_2 1e-05
[0.87978979 0.8413886  0.77069072]
alpha_1: 1e-05 alpha_2 0.0001
[0.87978971 0.84138917 0.77069661]
alpha_1: 1e-05 alpha_2 0.001
[0.87978893 0.8413948

[0.86025646 0.83029414 0.72440098]
alpha_1: 100000 alpha_2 0.0001
[0.86025649 0.83029416 0.72440106]
alpha_1: 100000 alpha_2 0.001
[0.86025682 0.8302943  0.72440192]
alpha_1: 100000 alpha_2 0.01
[0.8602601  0.83029574 0.7244104 ]
alpha_1: 100000 alpha_2 0.1
[0.8602929  0.83031017 0.72449576]
alpha_1: 100000 alpha_2 1
[0.86061877 0.83045898 0.72530593]
alpha_1: 100000 alpha_2 10
[0.86368976 0.83190127 0.73154872]
alpha_1: 100000 alpha_2 100
[0.87941881 0.84072084 0.766537  ]
alpha_1: 100000 alpha_2 1000
[0.86507322 0.81036378 0.79714626]
alpha_1: 100000 alpha_2 10000
[0.66644341 0.60813287 0.63724399]
alpha_1: 100000 alpha_2 100000
[0.03819003 0.03678077 0.04150148]


In [91]:
lr = linear_model.BayesianRidge(alpha_2=1)
model = lr.fit(values, y)

In [100]:
model

BayesianRidge(alpha_1=1e-06, alpha_2=1, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [92]:
predict_results = model.predict(test_df)

In [94]:
price = convert_to_price(sales, predict_results)

In [95]:
price.transpose()

array([101027.35893636, 154891.94490296, 164181.61725677, ...,
       150786.74563215,  99599.48592623, 217707.66168986])

In [96]:
test_ids['SalePrice'] = price.transpose()

In [98]:
test_ids

Unnamed: 0,Id,SalePrice
0,1461,101027.358936
1,1462,154891.944903
2,1463,164181.617257
3,1464,174318.185323
4,1465,204388.705032
5,1466,157755.849285
6,1467,161915.609364
7,1468,152023.983171
8,1469,189954.570631
9,1470,115573.644404


In [99]:
test_ids.to_csv("submission_alpha2_1.csv", index=False)