### Import Libraries

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from xgboost.sklearn import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer #transform different types
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score
from math import sqrt

### Data

In [None]:
# Load Boston housing dataset
from sklearn.datasets import load_boston
boston = load_boston()

In [None]:
print(boston.keys())
print(boston.DESCR)
#print(boston.feature_names)
#print(type(boston))

In [None]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = boston.target
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
#df.info()

### Split into Training and Test Datasets

In [None]:
# features from the dataset
features = ['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
X = df[features].copy()
y = df['MEDV'].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

### Pipelines: Pre-Processing Stage

In [None]:
numerical_features = features
# Applying SimpleImputer and StandardScaler into a pipeline
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer()),
    ('scaler', StandardScaler())])

In [None]:
data_transformer = ColumnTransformer(
    transformers=[('numerical', numerical_transformer, numerical_features)]) 

In [None]:
# Important note: Parameters of pipelines are set using '__' separated parameter names
param_grid = {
    'data_transformer__numerical__imputer__strategy': ['mean', 'median','most_frequent']
}

### Pipeline with Linear Regression

In [None]:
pipe_lm = Pipeline(steps=[('data_transformer', data_transformer),
                          ('pipe_lm', LinearRegression())])

grid_lm = GridSearchCV(pipe_lm, param_grid=param_grid) #default cv is 5-fold
grid_lm.fit(X_train, y_train);

In [None]:
print(grid_lm.best_score_)  #mean cross-validated score of the best_estimator
print(grid_lm.best_params_)
#print(grid_lm.best_estimator_)

#Reference -- https://scikit-learn.org/stable/modules/grid_search.html#specifying-an-objective-metric
#By default, parameter search uses the score function of the estimator to evaluate a parameter 
#setting. These are the sklearn.metrics.accuracy_score for classification and 
#sklearn.metrics.r2_score for regression.

In [None]:
#grid_lm.cv_results_

In [None]:
y_predict_lm = grid_lm.predict(X_test)

sns.regplot(x=y_predict_lm, y=y_test, color='red', marker="^")
plt.xlabel("Predicted Price")
plt.ylabel("Actual Price")
plt.title("Linear Regression Model")
plt.show()

In [None]:
RMSE= np.sqrt(mean_squared_error(y_test, y_predict_lm))
MSE= mean_squared_error(y_test, y_predict_lm)
MAE= mean_absolute_error(y_test, y_predict_lm)
r2= r2_score(y_test, y_predict_lm)

print('RMSE =', RMSE)
print('MSE =', MSE)
print('MAE =', MAE)
print('R2 =', r2) 

### Pipeline with Random Forest Regressor

In [None]:
pipe_rf = Pipeline(steps=[('data_transformer', data_transformer),
                           ('pipe_rf', RandomForestRegressor(random_state=0))])

grid_rf = GridSearchCV(pipe_rf, param_grid=param_grid)
grid_rf.fit(X_train, y_train);

In [None]:
print(grid_rf.best_score_)
print(grid_rf.best_params_)
#print(grid_rf.best_estimator_)

In [None]:
y_predict_rf = grid_rf.predict(X_test)

sns.regplot(x=y_predict_rf, y=y_test, color = 'green', marker = "D")
plt.xlabel("Predicted Price")
plt.ylabel("Actual Price")
plt.title("Random Forest Model")
plt.show()

In [None]:
RMSE= np.sqrt(mean_squared_error(y_test, y_predict_rf))
MSE= mean_squared_error(y_test, y_predict_rf)
MAE= mean_absolute_error(y_test, y_predict_rf)
r2= r2_score(y_test, y_predict_rf)

print('RMSE =', RMSE)
print('MSE =', MSE)
print('MAE =', MAE)
print('R2 =', r2) 

### Pipeline with XGBoost Regressor

In [None]:
pipe_xgb = Pipeline(steps=[('data_transformer', data_transformer),
                           ('pipe_xgb', XGBRegressor(random_state=0))])
                           
grid_xgb = GridSearchCV(pipe_xgb, param_grid=param_grid)  #default cv is 5-fold
grid_xgb.fit(X_train, y_train);

In [None]:
#XGBRegressor()

In [None]:
print(grid_xgb.best_score_)
print(grid_xgb.best_params_)
#print(grid_xgb.best_estimator_)

In [None]:
y_predict_xgb = grid_xgb.predict(X_test)

sns.regplot(x=y_predict_xgb, y=y_test, color='blue', marker = "o")
plt.xlabel("Predicted Price")
plt.ylabel("Actual Price")
plt.title("XGBoost Model")
plt.show()

In [None]:
RMSE= np.sqrt(mean_squared_error(y_test, y_predict_xgb))
MSE= mean_squared_error(y_test, y_predict_xgb)
MAE= mean_absolute_error(y_test, y_predict_xgb)
r2= r2_score(y_test, y_predict_xgb)

print('RMSE =', RMSE)
print('MSE =', MSE)
print('MAE =', MAE)
print('R2 =', r2) 

### Pipeline with Support Vector Regressor

In [None]:
pipe_svr = Pipeline(steps=[('data_transformer', data_transformer),
                           ('pipe_svr', SVR())])
                           
grid_svr = GridSearchCV(pipe_svr, param_grid=param_grid)  
grid_svr.fit(X_train, y_train);

#Reference links:
#https://scikit-learn.org/stable/modules/svm.html
#https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
#https://medium.com/coinmonks/support-vector-regression-or-svr-8eb3acf6d0ff

In [None]:
print(grid_svr.best_score_)
print(grid_svr.best_params_)
#print(grid_svr.best_estimator_)

In [None]:
y_predict_svr = grid_svr.predict(X_test)

sns.regplot(x=y_predict_svr, y=y_test, color='black', marker = "*")
plt.xlabel("Predicted Price")
plt.ylabel("Actual Price")
plt.title("Support Vector Regressor Model")
plt.show()

In [None]:
RMSE= np.sqrt(mean_squared_error(y_test, y_predict_svr))
MSE= mean_squared_error(y_test, y_predict_svr)
MAE= mean_absolute_error(y_test, y_predict_svr)
r2= r2_score(y_test, y_predict_svr)

print('RMSE =', RMSE)
print('MSE =', MSE)
print('MAE =', MAE)
print('R2 =', r2) 

### References

* https://amitg0161.medium.com/sklearn-linear-regression-tutorial-with-boston-house-dataset-cde74afd460a

* https://www.educative.io/blog/scikit-learn-cheat-sheet-classification-regression-methods

* https://www.kaggle.com/amar09/regression-algorithms-using-scikit-learn
