In [1]:
# Import the libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Ignore harmful warnings
import warnings
warnings.filterwarnings('ignore')

import pandasql as psql

In [4]:
# Load the data from all work sheets (Target variable = PE)

power_plant_data = pd.concat(pd.read_excel(r"D:\iiit notes\Programming\AI\Internship practice\47 season 23-jul-2021\Power_Plant.xlsx", 
                                           sheet_name = None), ignore_index= True)
power_plant_data.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [5]:
# Display the size of the dataset 

power_plant_data.shape

(47840, 5)

In [6]:
# Display dataset information

power_plant_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47840 entries, 0 to 47839
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      47840 non-null  float64
 1   V       47840 non-null  float64
 2   AP      47840 non-null  float64
 3   RH      47840 non-null  float64
 4   PE      47840 non-null  float64
dtypes: float64(5)
memory usage: 1.8 MB


In [7]:
# Display the corr of data

power_plant_data.corr()

Unnamed: 0,AT,V,AP,RH,PE
AT,1.0,0.844107,-0.507549,-0.542535,-0.948128
V,0.844107,1.0,-0.413502,-0.312187,-0.86978
AP,-0.507549,-0.413502,1.0,0.099574,0.518429
RH,-0.542535,-0.312187,0.099574,1.0,0.389794
PE,-0.948128,-0.86978,0.518429,0.389794,1.0


In [8]:
# Identify the independent and Target (dependent) variable

IndepVar = []
for col in power_plant_data.columns:
    if col != 'PE':
        IndepVar.append(col)

TargetVar = 'PE'

x = power_plant_data[IndepVar]
y = power_plant_data[TargetVar]

In [34]:
# Split the data into train and test (random sampling)

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state =42)
x_test_F1 = x_test.copy()

In [35]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train = mmscaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train)

x_test = mmscaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test)

# Multi-Regression Algorithm

In [36]:
# Train the algorithm and build the model with train dataset

from sklearn.linear_model import LinearRegression

modelMR = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None, positive=False)

# Fit the model with train dataset

modelMR.fit(x_train, y_train)

LinearRegression()

In [37]:
# Predit the model with test dataset

y1_pred = modelMR.predict(x_test)

# Evalution metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y1_pred), 3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y1_pred), 3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y1_pred)), 3))
print('Mean Absolute Percentage Error (MAPE)', round(metrics.mean_absolute_percentage_error(y_test, y1_pred), 3)*100, "%")
print('R2_score', round(metrics.r2_score(y_test, y1_pred), 3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y1_pred), 6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1), 6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.611
Mean Squared Error (MSE): 20.28
Root Mean Squared Error (RMSE): 4.503
Mean Absolute Percentage Error (MAPE) 0.8 %
R2_score 0.931
Adj R Square:  0.930612


In [38]:
Results = pd.DataFrame({'PE':y_test, 'PE_pred':y1_pred})

# Merage the Dataframes on index of both the dataframes

ResultsFinal = x_test_F1.merge(Results, left_index=True, right_index=True)
ResultsFinal.sample(10)

Unnamed: 0,AT,V,AP,RH,PE,PE_pred
741,24.7,58.46,1015.58,68.64,439.7,444.319795
22248,29.31,68.67,1006.18,63.38,435.57,433.099101
40395,16.72,44.78,1007.75,56.79,453.39,464.741581
21340,30.15,72.51,1009.37,50.64,443.72,432.769671
25935,16.25,58.86,1015.94,86.52,456.49,458.099212
30310,19.02,44.85,1014.57,43.37,465.42,462.729029
25147,25.56,58.82,1010.44,70.23,441.0,441.975401
23129,24.65,64.63,1020.52,53.45,446.04,445.702383
10823,14.12,39.4,1008.02,76.45,468.63,468.00872
47256,24.33,46.93,1013.51,52.63,449.07,450.191061


# Gradient Boosting Regressor

In [39]:
# Build the model with Gradient Boosting Regressor

from sklearn.ensemble import GradientBoostingRegressor

modelGBR = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', 
                                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, 
                                     min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, 
                                     max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, 
                                     validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

modelGBR.fit(x_train, y_train)

# Predict the model with test dataset

y2_pred = modelGBR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y2_pred), 3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y2_pred), 3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y2_pred)), 3))
print('Mean Absolute Percentage Error (MAPE)', round(metrics.mean_absolute_percentage_error(y_test, y2_pred), 3)*100, "%")
print('R2_score', round(metrics.r2_score(y_test, y2_pred), 3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y2_pred), 6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1), 6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 2.854
Mean Squared Error (MSE): 13.732
Root Mean Squared Error (RMSE): 3.706
Mean Absolute Percentage Error (MAPE) 0.6 %
R2_score 0.953
Adj R Square:  0.953018


# Decision Tree Regressor

In [40]:
# Build the Decision Tree Regressor model 

from sklearn.tree import DecisionTreeRegressor

modelDT = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                                min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                min_impurity_split=None, ccp_alpha=0.0)

modelDT.fit(x_train, y_train)

# Predict the model with test dataset

y3_pred = modelDT.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y3_pred), 3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y3_pred), 3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y3_pred)), 3))
print('Mean Absolute Percentage Error (MAPE)', round(metrics.mean_absolute_percentage_error(y_test, y3_pred), 3)*100, "%")
print('R2_score', round(metrics.r2_score(y_test, y3_pred), 3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y3_pred), 6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1), 6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 0.266
Mean Squared Error (MSE): 1.633
Root Mean Squared Error (RMSE): 1.278
Mean Absolute Percentage Error (MAPE) 0.1 %
R2_score 0.994
Adj R Square:  0.994414


In [41]:
Results = pd.DataFrame({'PE':y_test, 'PE_pred':y3_pred})

# Merage the Dataframes on index of both the dataframes

ResultsFinal = x_test_F1.merge(Results, left_index=True, right_index=True)
ResultsFinal.sample(10)

Unnamed: 0,AT,V,AP,RH,PE,PE_pred
27805,22.19,48.78,1017.84,76.01,446.51,451.84
43391,16.37,54.3,1017.94,63.63,459.97,459.97
8897,28.23,64.05,1011.98,75.23,442.3,442.3
27816,11.75,35.76,1018.99,57.92,473.01,473.01
30265,8.72,40.72,1023.48,77.87,485.3,485.3
11260,14.38,44.84,1024.59,81.68,471.6,471.6
46556,26.56,52.3,1007.4,62.04,439.25,439.25
25043,13.79,45.08,1025.02,84.36,469.55,469.55
15153,32.35,77.95,1014.76,60.88,432.72,432.72
40755,11.86,40.27,1008.06,72.91,474.57,474.57


# Random Forest Regressor

In [42]:
# Build the Random Forest Regressor

from sklearn.ensemble import RandomForestRegressor

modelRFG = RandomForestRegressor(n_estimators=100, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                                 min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, 
                                 min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, 
                                 n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None)
modelRFG.fit(x_train, y_train)

# Predict the model with test data

y4_pred = modelRFG.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y4_pred), 3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y4_pred), 3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y4_pred)), 3))
print('Mean Absolute Percentage Error (MAPE)', round(metrics.mean_absolute_percentage_error(y_test, y4_pred), 3)*100, "%")
print('R2_score', round(metrics.r2_score(y_test, y4_pred), 3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y4_pred), 6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1), 6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 0.409
Mean Squared Error (MSE): 0.539
Root Mean Squared Error (RMSE): 0.734
Mean Absolute Percentage Error (MAPE) 0.1 %
R2_score 0.998
Adj R Square:  0.998155


# KNN Regressor

In [46]:
# Build the KNN Regressor

from sklearn.neighbors import KNeighborsRegressor

modelKNN = KNeighborsRegressor(n_neighbors=5,weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                        metric='minkowski', metric_params=None, n_jobs=None)

modelKNN.fit(x_train, y_train)

# Predict the model with test data

y5_pred = modelKNN.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y5_pred), 3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y5_pred), 3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y5_pred)), 3))
print('Mean Absolute Percentage Error (MAPE)', round(metrics.mean_absolute_percentage_error(y_test, y5_pred), 3)*100, "%")
print('R2_score', round(metrics.r2_score(y_test, y5_pred), 3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y5_pred), 6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1), 6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 1.231
Mean Squared Error (MSE): 4.052
Root Mean Squared Error (RMSE): 2.013
Mean Absolute Percentage Error (MAPE) 0.3 %
R2_score 0.986
Adj R Square:  0.986137


# SVR algorithm

In [48]:
# Build the SVM Regressor model 

from sklearn.svm import SVR 

modelSVR = SVR(kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, 
               cache_size=200, verbose=False, max_iter=- 1) 
modelSVR.fit(x_train, y_train) 

# Predict the model with test dataset 

y6_pred = modelSVR.predict(x_test) 

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y6_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y6_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y6_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y6_pred), 3) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y6_pred),6)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y6_pred),6) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.054
Mean Squared Error (MSE): 16.114
Root Mean Squared Error (RMSE): 4.014
Mean Absolute Percentage Error (MAPE): 0.7000000000000001 %
R2_score: 0.94487
Adj R Square:  0.944865
