## Load and Preprocess the dataset

In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('../Datasets/Regression/Model_Selection_Regression/Data.csv')
# print(dataset)

X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

print(X)
print(y)

# Taking care of Missing values
# from sklearn.impute import SimpleImputer 
# imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
# imputer.fit(X[:,:-1])
# X[:,:-1] =  imputer.transform(X[:,:-1])
# imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
# imputer.fit(X[:,-2:-1])
# X[:,-2:-1] =  imputer.transform(X[:,-2:-1])
# print(X)

# Encoding categorial Data [One Hot Encoding]
# from sklearn.compose import ColumnTransformer
# from sklearn.preprocessing import OneHotEncoder
# ct = ColumnTransformer(transformers=[('encode',OneHotEncoder(),[-1])], remainder='passthrough')
# X = np.array(ct.fit_transform(X))
# # print(X)

# Splitting dataset into train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

[[  14.96   41.76 1024.07   73.17]
 [  25.18   62.96 1020.04   59.08]
 [   5.11   39.4  1012.16   92.14]
 ...
 [  31.32   74.33 1012.92   36.48]
 [  24.48   69.45 1013.86   62.39]
 [  21.6    62.52 1017.23   67.87]]
[463.26 444.37 488.56 ... 429.57 435.74 453.28]


## Training and predicting the Multiple Linear Regression Model on the training set

In [44]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred_MLR = regressor.predict(X_test)
print(y_pred_MLR)

[457.25522108 466.71927366 440.36694911 ... 476.40502919 424.61609708
 463.91141143]


## Training and predicting the Polynomial Regression Model on the training set

In [45]:
from sklearn.preprocessing  import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_fet = PolynomialFeatures(degree=4)
poly_X = poly_fet.fit_transform(X_train)
poly_reg =  LinearRegression()
poly_reg.fit(poly_X,y_train)
y_pred_PR = poly_reg.predict(poly_fet.fit_transform(X_test))
print(y_pred_PR)

[456.08178926 462.80300914 438.32636632 ... 476.31568008 432.6794335
 464.55561808]


## Training and predicting the Support Vector Regression Model on the training set

In [46]:
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_X.fit(X_train)
X_sc = sc_X.transform(X_train)

# reshape y
y_sc = y_train.reshape((len(y_train),1))

sc_y = StandardScaler()
sc_y.fit(y_sc)
y_sc = sc_y.transform(y_sc)

# print(X_sc)
# print(y_sc)

from sklearn.svm import SVR
regressor =  SVR(kernel='rbf')
regressor.fit(X_sc,y_sc.ravel())

y_pred_SVR = regressor.predict(sc_X.transform(X_test))
print(y_pred_SVR)
y_pred_SVR = sc_y.inverse_transform(y_pred_SVR.reshape(len(y_pred_SVR),1)).ravel()
print(y_pred_SVR)

[ 0.15419743  0.5472489  -0.99023963 ...  1.29174246 -1.26142145
  0.5822108 ]
[456.98286871 463.66609133 437.52351516 ... 476.32503421 432.91249439
 464.26056348]


## Training and predicting the Decision Tree Regression Model on the training set

In [47]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train,y_train)
y_pred_DTR = regressor.predict(X_test)
print(y_pred_DTR)

[459.65 462.26 436.03 ... 477.18 432.78 468.23]


## Training and predicting the Random Forest Regression Model on the training set

In [48]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train,y_train)
y_pred_RFR = regressor.predict(X_test)
print(y_pred_RFR)

[457.902 464.072 439.721 ... 476.177 432.897 466.106]


## R-Square Comparision for All model


In [49]:
from sklearn.metrics import r2_score
R2S = {}
R2S['Multiple Linear Regression'] = r2_score(y_test, y_pred_MLR)
R2S['Polynomial Regression'] = r2_score(y_test, y_pred_PR)
R2S['Support Vector Regression'] = r2_score(y_test, y_pred_SVR)
R2S['Decision Tree Regression'] = r2_score(y_test, y_pred_DTR)
R2S['Random Forest Regression'] = r2_score(y_test, y_pred_RFR)

for method in R2S:
    print(method,":",R2S[method])

Multiple Linear Regression : 0.9321860060402447
Polynomial Regression : 0.9447340594673367
Support Vector Regression : 0.9479978713795594
Decision Tree Regression : 0.9342783714449767
Random Forest Regression : 0.9628673278135129
