## using different models and comparind r2 scores

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## getting data

In [76]:
data = pd.read_csv('Data.csv')
X = data.iloc[: , :-1].values
y = data.iloc[:, -1].values

In [77]:
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [78]:
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

## plot

## performing train, test, split

In [115]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## multiple linear regression

In [116]:
from sklearn.linear_model import LinearRegression 
regressor_mlr = LinearRegression()
regressor_mlr.fit(X_train, y_train)

LinearRegression()

In [117]:
y_pred = regressor_mlr.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


In [118]:
from sklearn.metrics import r2_score
r2_mlr = r2_score(y_test, y_pred)
print(r2_mlr)

0.9325315554761303


## polynomial regression

In [119]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor_pr = LinearRegression()
regressor_pr.fit(X_poly, y_train)

LinearRegression()

In [120]:
y_pred = regressor_pr.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.53 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.66 463.28]]


In [121]:
from sklearn.metrics import r2_score
r2_pr = r2_score(y_test, y_pred)
print(r2_pr)

0.9458193819604375


## descision tree

In [122]:
from sklearn.tree import DecisionTreeRegressor
regressor_dt = DecisionTreeRegressor(random_state = 0)
regressor_dt.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

In [123]:
y_pred = regressor_dt.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


In [124]:
from sklearn.metrics import r2_score
r2_dt = r2_score(y_test, y_pred)
print(r2_dt)

0.922905874177941


## random forest

In [125]:
from sklearn.ensemble import RandomForestRegressor
regressor_rf = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor_rf.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

In [126]:
y_pred = regressor_rf.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


In [127]:
from sklearn.metrics import r2_score
r2_rf = r2_score(y_test, y_pred)
print(r2_rf)

0.9615908334363876


## support vector regression

* data preprocessing involves reshaping y values

In [128]:
y = y.reshape(len(y), 1)

* retraining based on modified y

In [130]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

* feature scaling

In [131]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

* regression

In [132]:
from sklearn.svm import SVR
regressor_svr = SVR(kernel = 'rbf')
regressor_svr.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


SVR()

In [133]:
y_pred = sc_y.inverse_transform(regressor_svr.predict(sc_X.transform(X_test)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


In [134]:
from sklearn.metrics import r2_score
r2_svr = r2_score(y_test, y_pred)
print(r2_svr)

0.948078404998626


## final results comparism

In [135]:
from tabulate import tabulate
table = [["model", "r2_score"],
        ["multiple lr", r2_mlr],
        ["polynomial lr", r2_pr],
        ["descision tree", r2_dt],
        ["random forest", r2_rf],
        ["support vector reg", r2_svr]]
print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

╒════════════════════╤════════════╕
│ model              │   r2_score │
╞════════════════════╪════════════╡
│ multiple lr        │   0.932532 │
├────────────────────┼────────────┤
│ polynomial lr      │   0.945819 │
├────────────────────┼────────────┤
│ descision tree     │   0.922906 │
├────────────────────┼────────────┤
│ random forest      │   0.961591 │
├────────────────────┼────────────┤
│ support vector reg │   0.948078 │
╘════════════════════╧════════════╛
