# Harish Practise: Regression Model Selection for Engine Energy Prediction

# Import Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv('dataset.csv')

In [3]:
df.shape

(9568, 5)

In [4]:
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [9]:
X = df.iloc[:,:-1].values
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [8]:
y = df.iloc[:,-1].values
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

In [10]:
ysvm = y.reshape(len(y),1)

# Splitting the dataset into the Training set and Test set

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
X_trainsvm,X_testsvm,y_trainsvm,y_testsvm=train_test_split(X,ysvm,test_size=0.25,random_state=0)

# Importing Machine Learning Algorithms

In [14]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

# Initializing different Regression algorithms

In [16]:
from sklearn.preprocessing import StandardScaler

modelLR  = LinearRegression()

poly_reg = PolynomialFeatures(degree = 4)
X_poly   = poly_reg.fit_transform(X_train)
modelPLR = LinearRegression()

modelRFR = RandomForestRegressor(n_estimators = 10, random_state = 0)

modelDTR = DecisionTreeRegressor(random_state = 0)

modelSVR = SVR(kernel = 'rbf')

sc_X     = StandardScaler()
sc_y     = StandardScaler()
X_trainsvm  = sc_X.fit_transform(X_trainsvm)
y_trainsvm  = sc_y.fit_transform(y_trainsvm)

# Training Regression algorithm

In [17]:
modelLR.fit(X_train, y_train)
modelPLR.fit(X_poly, y_train)
modelRFR.fit(X_train, y_train)
modelDTR.fit(X_train, y_train)
modelSVR.fit(X_trainsvm, y_trainsvm)

  y = column_or_1d(y, warn=True)


SVR()

# Predicting the Test set for Validation

In [21]:
modelLRy_pred  = modelLR.predict(X_test)
modelPLRy_pred = modelPLR.predict(poly_reg.transform(X_test))
modelRFRy_pred = modelRFR.predict(X_test)
modelDTRy_pred = modelDTR.predict(X_test)
#modelSVRy_pred = sc_y.inverse_transform(modelSVR.predict(sc_X.transform(X_test)))
modelSVRy_pred = sc_y.inverse_transform(modelSVR.predict(sc_X.transform(X_test)).reshape(-1, 1)).flatten()

#The error occurred because the StandardScaler.inverse_transform() function expects a 2D array as input, but modelSVR.predict() returns a 1D array. This mismatch triggered a ValueError stating that a 2D array was expected. This issue is common when working with Scikit-learn's StandardScaler, which always requires 2D inputs, even for single-variable data. To correct it, the predicted values from the SVR model were reshaped using .reshape(-1, 1) to convert the 1D array into a 2D format acceptable by inverse_transform(). After the inverse transformation, .flatten() was used to convert the result back to a 1D array so it could be evaluated properly with r2_score. This ensured compatibility with both the scaler and the evaluation function, resolving the error.

# Evaluating the Model Performance

In [22]:
from sklearn.metrics import r2_score
print("Linear Regression Accuracy: {}".format(r2_score(y_test, modelLRy_pred)))
print("Polynomial Regression Accuracy: {}".format(r2_score(y_test, modelPLRy_pred)))
print("Random Forest Regression Accuracy: {}".format(r2_score(y_test, modelRFRy_pred)))
print("Decision Treee Regression Accuracy: {}".format(r2_score(y_test, modelDTRy_pred)))
#print("Support Vector Regression Accuracy: {}".format(r2_score(y_test, modelSVRy_pred)))
modelSVRy_pred = sc_y.inverse_transform(
    modelSVR.predict(sc_X.transform(X_test)).reshape(-1, 1)
).flatten()

print("Support Vector Regression Accuracy: {}".format(r2_score(y_test, modelSVRy_pred)))

Linear Regression Accuracy: 0.9323789104734466
Polynomial Regression Accuracy: 0.9459732425800774
Random Forest Regression Accuracy: 0.960853142550123
Decision Treee Regression Accuracy: 0.9229733351311707
Support Vector Regression Accuracy: 0.9483940635883191
