# Importing the libraries

In [59]:
import pandas as pd
import numpy as np

# Importing & Splitting Data

## Importing the dataset

PE = Energy Output
AT = Engine Temperature
AP = Ambient Pressure
RH = Relative Humidity

In [60]:
# dataset = pd.read_csv('ENTER_THE_NAME_OF_YOUR_DATASET_HERE.csv')
# https://archive.ics.uci.edu/ml/index.php
dataset = pd.read_csv('data/data_regression.csv')
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [61]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [62]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Linear Regression

In [63]:
import statsmodels.api as sm

In [64]:
X_const = sm.add_constant(X) # adding a constant
model = sm.OLS(y, X).fit()
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   1.000
Model:                            OLS   Adj. R-squared (uncentered):              1.000
Method:                 Least Squares   F-statistic:                          1.939e+07
Date:                Sat, 25 Feb 2023   Prob (F-statistic):                        0.00
Time:                        17:17:44   Log-Likelihood:                         -29068.
No. Observations:                9568   AIC:                                  5.814e+04
Df Residuals:                    9564   BIC:                                  5.817e+04
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [65]:
ypred = model.predict(X)
ypred[:5]

array([471.1 , 447.55, 480.39, 449.65, 469.4 ])

# Reviewing RMSE

In [66]:
from sklearn import metrics
import math

In [67]:
mse = metrics.mean_squared_error(y, ypred)
mae = metrics.mean_absolute_error(y, ypred)
rmse = math.sqrt(mse)

print('MSE Score: {:.2f}'.format(mse))
print('MAE Score: {:.2f}'.format(mae))
print('RMSE Score: {:.2f}'.format(rmse))

MSE Score: 25.49
MAE Score: 4.02
RMSE Score: 5.05


### Training the Multiple Linear Regression model on the Training set

In [68]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

### Predicting the Test set results

In [69]:
y_pred = regressor.predict(X_test)
#np.set_printoptions(precision=2)

In [70]:
y_pred[:5]

array([431.43, 458.58, 462.77, 448.61, 457.88])

### Evaluating the Model Performance

In [71]:
dataset['PE'].describe()

count    9568.000000
mean      454.365009
std        17.066995
min       420.260000
25%       439.750000
50%       451.550000
75%       468.430000
max       495.760000
Name: PE, dtype: float64

In [72]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print("R2 Score: {:.2f}%".format(r2_score(y_test, y_pred)*100))
print("MSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred)))
print("RMSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))
print("RMSE Score: {:.2f}".format(np.sqrt(mean_squared_error(y_test, y_pred))))
print("MAE Score: {:.2f}".format(mean_absolute_error(y_test, y_pred)))


R2 Score: 93.24%
MSE Score: 19.83
RMSE Score: 4.45
RMSE Score: 4.45
MAE Score: 3.56


# Polynomial Regression

## Training the Polynomial Regression model on the Training set

In [73]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression()

## Predicting the Test set results

In [74]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.98 431.23]
 [457.97 460.01]
 [460.69 461.14]
 ...
 [470.31 468.9 ]
 [477.79 473.9 ]
 [440.03 437.07]]


## Evaluating the Model Performance

In [75]:
print("R2 Score: {:.2f}%".format(r2_score(y_test, y_pred)*100))
print("MSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred)))
print("RMSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))
print("RMSE Score: {:.2f}".format(np.sqrt(mean_squared_error(y_test, y_pred))))
print("MAE Score: {:.2f}".format(mean_absolute_error(y_test, y_pred)))

R2 Score: 94.60%
MSE Score: 15.85
RMSE Score: 3.98
RMSE Score: 3.98
MAE Score: 3.12


# Decision Tree

## Training the Decision Tree Regression model on the Training set

In [76]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

## Predicting the Test set results

In [77]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[432.98 431.23]
 [454.67 460.01]
 [463.6  461.14]
 ...
 [475.89 468.9 ]
 [474.15 473.9 ]
 [439.99 437.07]]


## Evaluating the Model Performance

In [78]:
print("R2 Score: {:.2f}%".format(r2_score(y_test, y_pred)*100))
print("MSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred)))
print("RMSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))
print("RMSE Score: {:.2f}".format(np.sqrt(mean_squared_error(y_test, y_pred))))
print("MAE Score: {:.2f}".format(mean_absolute_error(y_test, y_pred)))

R2 Score: 92.30%
MSE Score: 22.59
RMSE Score: 4.75
RMSE Score: 4.75
MAE Score: 3.19


# Random Forest

## Training the Random Forest Regression model on the whole dataset

In [79]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

## Predicting the Test set results

In [80]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.54 431.23]
 [458.79 460.01]
 [462.62 461.14]
 ...
 [470.44 468.9 ]
 [475.13 473.9 ]
 [439.56 437.07]]


## Evaluating the Model Performance

In [81]:
print("R2 Score: {:.2f}%".format(r2_score(y_test, y_pred)*100))
print("MSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred)))
print("RMSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))
print("RMSE Score: {:.2f}".format(np.sqrt(mean_squared_error(y_test, y_pred))))
print("MAE Score: {:.2f}".format(mean_absolute_error(y_test, y_pred)))

R2 Score: 96.09%
MSE Score: 11.48
RMSE Score: 3.39
RMSE Score: 3.39
MAE Score: 2.50


# Support Vector Regression

## Feature Scaling

#### We must perform feature scaling for SVR. To perform feature scaling we have to reshape the data

In [82]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
y = y.reshape(len(y),1)

In [83]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [84]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

## Training the SVR model on the Training set

In [85]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

  return f(*args, **kwargs)


SVR()

## Predicting the Test set results

In [86]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


## Evaluating the Model Performance

In [87]:
print("R2 Score: {:.2f}%".format(r2_score(y_test, y_pred)*100))
print("MSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred)))
print("RMSE Score: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))
print("RMSE Score: {:.2f}".format(np.sqrt(mean_squared_error(y_test, y_pred))))
print("MAE Score: {:.2f}".format(mean_absolute_error(y_test, y_pred)))

R2 Score: 94.81%
MSE Score: 15.19
RMSE Score: 3.90
RMSE Score: 3.90
MAE Score: 3.00
