Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Import Dataset

In [2]:
data = pd.read_csv('Data.csv')
print(data)

         AT      V       AP     RH      PE
0     14.96  41.76  1024.07  73.17  463.26
1     25.18  62.96  1020.04  59.08  444.37
2      5.11  39.40  1012.16  92.14  488.56
3     20.86  57.32  1010.24  76.64  446.48
4     10.82  37.50  1009.23  96.62  473.90
...     ...    ...      ...    ...     ...
9563  16.65  49.69  1014.01  91.00  460.03
9564  13.19  39.18  1023.67  66.78  469.62
9565  31.32  74.33  1012.92  36.48  429.57
9566  24.48  69.45  1013.86  62.39  435.74
9567  21.60  62.52  1017.23  67.87  453.28

[9568 rows x 5 columns]


In [3]:
x = data.iloc[:, :-1].values
print(x)

[[  14.96   41.76 1024.07   73.17]
 [  25.18   62.96 1020.04   59.08]
 [   5.11   39.4  1012.16   92.14]
 ...
 [  31.32   74.33 1012.92   36.48]
 [  24.48   69.45 1013.86   62.39]
 [  21.6    62.52 1017.23   67.87]]


In [4]:
y = data.iloc[:, -1].values
print(y)

[463.26 444.37 488.56 ... 429.57 435.74 453.28]


Split data into training and test set

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [6]:
print(x_train)

[[  11.22   43.13 1017.24   80.9 ]
 [  13.67   54.3  1015.92   75.42]
 [  32.84   77.95 1014.68   45.8 ]
 ...
 [  16.81   38.52 1018.26   75.21]
 [  12.8    41.16 1022.43   86.19]
 [  32.32   67.9  1006.08   37.93]]


In [7]:
print(x_test)

[[  28.66   77.95 1009.56   69.07]
 [  17.48   49.39 1021.51   84.53]
 [  14.86   43.14 1019.21   99.14]
 ...
 [  12.24   44.92 1023.74   88.21]
 [  27.28   47.93 1003.46   59.22]
 [  17.28   39.99 1007.09   74.25]]


In [8]:
print(y_train)

[473.93 467.87 431.97 ... 459.01 462.72 428.12]


In [9]:
print(y_test)

[431.23 460.01 461.14 ... 473.26 438.   463.28]


Multiple Linear Regression

In [10]:
from sklearn.linear_model import LinearRegression
mlr = LinearRegression()
mlr.fit(x_train, y_train)

In [11]:
y_pred = mlr.predict(x_test)
print(np.concatenate((y_test.reshape(len(y_test), 1), y_pred.reshape(len(y_test), 1)),1))

[[431.23       431.42761597]
 [460.01       458.56124622]
 [461.14       462.75264705]
 ...
 [473.26       469.51835895]
 [438.         442.41759454]
 [463.28       461.88279939]]


Evaluate the Model (R-Square)

In [12]:
from sklearn.metrics import r2_score
r_square_mlr = r2_score(y_test, y_pred)
print("R-Square For Multiple Linear Regression: ", r_square_mlr)

R-Square For Multiple Linear Regression:  0.9325315554761303


Polynomial Linear Regression

In [13]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 4)
x_poly = poly.fit_transform(x_train)
plr = LinearRegression()
plr.fit(x_poly, y_train)

In [14]:
y_poly = plr.predict(poly.fit_transform(x_test))
print(np.concatenate((y_test.reshape(len(y_test), 1), y_poly.reshape(len(y_poly), 1)), 1))

[[431.23       433.94378003]
 [460.01       457.90446834]
 [461.14       460.52453672]
 ...
 [473.26       469.52875127]
 [438.         438.26572548]
 [463.28       461.66503953]]


In [15]:
r_square_poly = r2_score(y_test, y_poly)
print("R-Square for Polynomial Regression: ", r_square_poly)

R-Square for Polynomial Regression:  0.9458192729884297


Support Vector Regression

In [16]:
from sklearn.preprocessing import StandardScaler
x_sc = StandardScaler()
y_sc = StandardScaler()
xt_scale = x_sc.fit_transform(x_train)
yt_scale = y_sc.fit_transform(y_train.reshape(len(y_train),1))

In [17]:
from sklearn.svm import SVR
svr = SVR(kernel = "rbf")
svr.fit(xt_scale, yt_scale)

  y = column_or_1d(y, warn=True)


In [18]:
y_svr = y_sc.inverse_transform(svr.predict(x_sc.fit_transform(x_test)).reshape(len(x_test), 1))
print(np.concatenate((y_test.reshape(len(y_test), 1), y_svr),1))

[[431.23       434.49984934]
 [460.01       457.70985097]
 [461.14       460.84909947]
 ...
 [473.26       470.22304261]
 [438.         439.28872637]
 [463.28       460.42910288]]


In [19]:
r_square_svr = r2_score(y_test, y_svr)
print("R-Square For Support Vector Regression: ", r_square_svr)

R-Square For Support Vector Regression:  0.9483693304317924


Decision Tree Regression

In [20]:
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(random_state = 0)
dtr.fit(x_train, y_train)

In [21]:
y_dtr = dtr.predict(x_test)
print(np.concatenate((y_test.reshape(len(y_test), 1), y_dtr.reshape(len(y_test), 1)), 1))

[[431.23 431.28]
 [460.01 459.59]
 [461.14 460.06]
 ...
 [473.26 471.46]
 [438.   437.76]
 [463.28 462.74]]


In [22]:
r_square_dtr = r2_score(y_test, y_dtr)
print("R-Square for Decision Tree Regression: ", r_square_dtr)

R-Square for Decision Tree Regression:  0.922905874177941


Random Forest Regression

In [23]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(n_estimators= 20, random_state = 0)
rfr.fit(x_train, y_train)

In [24]:
y_rfr = rfr.predict(x_test)
print(np.concatenate((y_test.reshape(len(y_test), 1), y_rfr.reshape(len(y_rfr), 1)), 1))

[[431.23   434.0805]
 [460.01   458.0535]
 [461.14   462.8255]
 ...
 [473.26   469.375 ]
 [438.     439.078 ]
 [463.28   460.6275]]


In [25]:
r_square_rfr = r2_score(y_test, y_rfr)
print("R-Square for Random Forest Regression: ", r_square_rfr)

R-Square for Random Forest Regression:  0.9639647253608774


Best Model

In [26]:
print("R-Square For Multiple Linear Regression: ", r_square_mlr, "\n"
"R-Square for Polynomial Regression: ", r_square_poly, "\n"
"R-Square For Support Vector Regression: ", r_square_svr, "\n"
"R-Square for Decision Tree Regression: ", r_square_dtr, "\n"
"R-Square for Random Forest Regression: ", r_square_rfr)

R-Square For Multiple Linear Regression:  0.9325315554761303 
R-Square for Polynomial Regression:  0.9458192729884297 
R-Square For Support Vector Regression:  0.9483693304317924 
R-Square for Decision Tree Regression:  0.922905874177941 
R-Square for Random Forest Regression:  0.9639647253608774
