# Linear model 
This notebook explores the linear model on various datasets.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline 
plt.style.use('ggplot')
import warnings
import json
import pickle
warnings.filterwarnings('ignore')

In [2]:
modified = "../../data/modified/"

In [3]:
OneHot = modified + "OneHotDerived2_Lasso.pkl"

In [4]:
OneHotData = pickle.load(open(OneHot, "rb"))

In [5]:
df_train_X = OneHotData["train_X"]
df_test_X = OneHotData["test_X"]
y = OneHotData["y"]
Id = OneHotData["id"]

In [6]:
from sklearn.metrics import make_scorer, mean_squared_error
scorer = make_scorer(mean_squared_error, False)

In [7]:
from sklearn import linear_model
reg = linear_model.RidgeCV(alphas = range(25,100),  scoring = scorer, cv=10)

In [8]:
reg.fit(df_train_X, y)

RidgeCV(alphas=range(25, 100), cv=10, fit_intercept=True, gcv_mode=None,
    normalize=False,
    scoring=make_scorer(mean_squared_error, greater_is_better=False),
    store_cv_values=False)

In [9]:
reg.alpha_

72

In [10]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/OneHotDerived2-Lasso-Ridge.csv",index = False)

In [11]:
OneHot = modified + "OneHotDerived2_RLasso0.pkl"

In [12]:
OneHotData = pickle.load(open(OneHot, "rb"))

In [13]:
df_train_X = OneHotData["train_X"]
df_test_X = OneHotData["test_X"]
y = OneHotData["y"]
Id = OneHotData["id"]

In [14]:
from sklearn.metrics import make_scorer, mean_squared_error
scorer = make_scorer(mean_squared_error, False)

In [15]:
from sklearn import linear_model
reg = linear_model.RidgeCV(alphas = range(200,250),  scoring = scorer, cv=10)

In [16]:
reg.fit(df_train_X, y)

RidgeCV(alphas=range(200, 250), cv=10, fit_intercept=True, gcv_mode=None,
    normalize=False,
    scoring=make_scorer(mean_squared_error, greater_is_better=False),
    store_cv_values=False)

In [17]:
reg.alpha_

228

In [18]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/OneHotDerived2-RLasso0-Ridge.csv",index = False)

In [19]:
OneHot = modified + "OneHotDerived2_RLasso1.0.pkl"

In [20]:
OneHotData = pickle.load(open(OneHot, "rb"))

In [21]:
df_train_X = OneHotData["train_X"]
df_test_X = OneHotData["test_X"]
y = OneHotData["y"]
Id = OneHotData["id"]

In [22]:
reg = linear_model.RidgeCV(alphas = range(150,250),  scoring = scorer, cv=10)

In [23]:
reg.fit(df_train_X, y)

RidgeCV(alphas=range(150, 250), cv=10, fit_intercept=True, gcv_mode=None,
    normalize=False,
    scoring=make_scorer(mean_squared_error, greater_is_better=False),
    store_cv_values=False)

In [24]:
reg.alpha_

159

In [25]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/OneHotDerived2-RLasso0-Ridge1.0.csv",index = False)

In [26]:
OneHot = modified + "OneHotDerived2_RLasso2.0.pkl"
OneHotData = pickle.load(open(OneHot, "rb"))

In [27]:
df_train_X = OneHotData["train_X"]
df_test_X = OneHotData["test_X"]
y = OneHotData["y"]
Id = OneHotData["id"]

In [28]:
reg = linear_model.RidgeCV(alphas = range(50,150),  scoring = scorer, cv=10)

In [29]:
reg.fit(df_train_X, y)
reg.alpha_

89

In [30]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/OneHotDerived2-RLasso0-Ridge2.0.csv",index = False)

In [31]:
Bayesian = modified + "BayesianDerived2_Lasso.pkl"
BayesianData = pickle.load(open(Bayesian, "rb"))

In [32]:
df_train_X = BayesianData["train_X"]
df_test_X = BayesianData["test_X"]
y = BayesianData["y"]
Id = BayesianData["id"]

In [33]:
reg = linear_model.RidgeCV(alphas = range(50,100),  scoring = scorer,cv=10)

In [34]:
reg.fit(df_train_X, y)
reg.alpha_

64

In [35]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/BayesianDerived2_Lasso-Ridge.csv",index = False)

In [36]:
Bayesian = modified + "BayesianDerived2_RLasso0.pkl"
BayesianData = pickle.load(open(Bayesian, "rb"))

In [37]:
df_train_X = BayesianData["train_X"]
df_test_X = BayesianData["test_X"]
y = BayesianData["y"]
Id = BayesianData["id"]

In [38]:
reg = linear_model.RidgeCV(alphas = [0.34,0.35,0.36,0.37,0.38,0.39],  scoring = scorer,cv=10)

In [39]:
reg.fit(df_train_X, y)
reg.alpha_

0.36

In [40]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/BayesianDerived2_RLasso0-Ridge.csv",index = False)

In [41]:
Bayesian = modified + "BayesianDerived2_RLasso1.0.pkl"
BayesianData = pickle.load(open(Bayesian, "rb"))

In [42]:
df_train_X = BayesianData["train_X"]
df_test_X = BayesianData["test_X"]
y = BayesianData["y"]
Id = BayesianData["id"]

In [43]:
reg = linear_model.RidgeCV(alphas = range(20,50),  scoring = scorer,cv=10)

In [44]:
reg.fit(df_train_X, y)
reg.alpha_

37

In [45]:
p_pred = np.expm1(reg.predict(df_test_X))
solution = pd.DataFrame({"id": Id, "SalePrice": p_pred}, columns=["id", "SalePrice"])
solution.to_csv("../../data/submission/BayesianDerived2_RLasso1.0-Ridge.csv",index = False)