I would like to implement all regression models and choose a better one.

In [48]:
%%capture
%run transformation.ipynb
# we can run other notebooks and use it's variables and imports

In [49]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

### **One hot encoded Data**

In [50]:
train_data = pd.read_csv("./data/label_encoded_train.csv")
test_data = pd.read_csv("./data/label_encoded_test.csv")

In [51]:
Y = train_data['SalePrice']
X_train_data = train_data.drop(columns='SalePrice', axis=1)

In [52]:
X_train, X_val, y_train, y_val = train_test_split(X_train_data, Y, test_size=0.20, random_state=42)

In [53]:
mse_1 = []
r_2 = []

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)
pred = lr_reg.predict(X_val)
mse = mean_squared_error(y_val , pred)
mse_1.append(mse)
r_2.append(r2_score(y_val, pred))

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)
pred = ridge_reg.predict(X_val)
mse = mean_squared_error(y_val , pred)
mse_1.append(mse)
r_2.append(r2_score(y_val, pred))

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)
pred = lasso_reg.predict(X_val)
mse = mean_squared_error(y_val , pred)
mse_1.append(mse)
r_2.append(r2_score(y_val, pred))

print(mse_1)
print(r_2)

[481622563.58382934, 485064084.7750262, 481527751.71527195]
[0.8890470095142413, 0.8882541748406703, 0.8890688516394767]


In [54]:
# getting output from ridge regression mode
grid_model = GridSearchCV(ridge_reg, 
                          param_grid = {'alpha': [0.05, 0.1, 1, 5, 8, 10, 12, 15, 20]}, 
                          scoring = 'neg_mean_squared_error', 
                          cv=5)
grid_model.fit(X_train_data, Y)

best_ridge = grid_model.best_estimator_
print(best_ridge)

Ridge(alpha=1)


In [55]:
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
pred = ridge_reg.predict(X_val)
mse = mean_squared_error(y_val , pred)
print(mse)
print(r2_score(y_val, pred))

485064084.7750262
0.8882541748406703


In [56]:
predictions = ridge_reg.predict(test_data)

In [57]:
submission = pd.DataFrame({"Id": test_data['Id'], "SalePrice": predictions})

In [58]:
submission.to_csv("./submissions/submission_ridge.csv", index = False)