In [1]:
# Importing the libraries 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Ignore harmless warnings

import warnings 
warnings.filterwarnings("ignore")

# Set to display all columns in dataset

pd.set_option("display.max_columns", 50)

# import psql to run queries

import pandasql as psql

In [2]:
# Load the data from all the work sheets 
    
power_plant_T0 = pd.read_excel(r"D:\iiit notes\Programming\AI\Internship practice\61 season 23- Aug - 2021\Power_Plant.xlsx", sheet_name=0) 
power_plant_T0.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [3]:
# Display the dataset information

power_plant_T0.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB


In [4]:
# Display the corr of data

power_plant_T0.corr()

Unnamed: 0,AT,V,AP,RH,PE
AT,1.0,0.844107,-0.507549,-0.542535,-0.948128
V,0.844107,1.0,-0.413502,-0.312187,-0.86978
AP,-0.507549,-0.413502,1.0,0.099574,0.518429
RH,-0.542535,-0.312187,0.099574,1.0,0.389794
PE,-0.948128,-0.86978,0.518429,0.389794,1.0


In [5]:
# Identify the independent and Target (dependent) variables 

IndepVar = [] 
for col in power_plant_T0.columns: 
    if col != 'PE': 
        IndepVar.append(col) 
        TargetVar = 'PE' 

x = power_plant_T0[IndepVar] 
y = power_plant_T0[TargetVar]

In [6]:
# Split the data into train and test (random sampling) 

from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) 
x_test_F1 = x_test.copy()

In [7]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0,1))

x_train = mmscaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train)

x_test = mmscaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test)

# Multi Regression

In [8]:
# Train the Multi Regression algorithm and build the model with train dataset

from sklearn.linear_model import LinearRegression

modelMR = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None, positive=False)

# Fit the model with train dataset

modelMR.fit(x_train, y_train)

LinearRegression()

In [9]:
# Predict the model with test dataset

y1_pred = modelMR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print("Mean Absolute Error (MAE):", round(metrics.mean_absolute_error(y_test, y1_pred), 3))
print("Mean Squared Error (MSE):", round(metrics.mean_squared_error(y_test, y1_pred), 3))
print("Root Mean Squared Error (RMSE):", round(np.sqrt(metrics.mean_squared_error(y_test, y1_pred)), 3))
print("Mean Absolute Percentage Error (MAPE):", round(metrics.mean_absolute_percentage_error(y_test, y1_pred)*100, 3), '%')
print("R2_Score:", round(metrics.r2_score(y_test, y1_pred), 3))

# Calculate Adjusted R squared value

r_squared = round(metrics.r2_score(y_test, y1_pred), 3)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 

print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.929
Mean Squared Error (MSE): 24.233
Root Mean Squared Error (RMSE): 4.923
Mean Absolute Percentage Error (MAPE): 0.87 %
R2_Score: 0.917
Adj R Square:  0.917


In [10]:
Results = pd.DataFrame({'PE': y_test, 'PE_Pred':y1_pred})

# Merge two DataFrames on index of both the dataframes

ResultsFinal = x_test_F1.merge(Results, left_index=True, right_index=True)
ResultsFinal.sample(10)

Unnamed: 0,AT,V,AP,RH,PE,PE_Pred
1416,25.21,60.37,1006.01,73.14,436.23,443.585763
8514,14.92,46.18,1014.21,98.82,465.63,463.27602
26,14.6,39.31,1011.11,72.52,464.16,469.523466
2485,22.56,70.79,1005.85,93.09,435.14,443.127593
8930,14.8,43.99,1022.89,85.25,461.97,466.603954
3656,13.2,41.78,1010.49,64.96,468.58,472.834337
5418,14.32,71.14,1019.87,79.15,460.02,461.981357
4827,19.08,46.93,1010.6,75.21,454.45,458.567048
4718,15.67,38.62,1015.76,66.29,463.17,468.821292
5503,23.97,68.67,1006.63,76.83,441.53,443.496324


# XGBoost Regressor

In [11]:
# Train the XGBoost Regression algorithm and build the model with train dataset

import xgboost as xgb 

modelXGR = xgb.XGBRegressor(objective ='reg:linear', n_estimators = 10, seed = 123) 

modelXGR.fit(x_train, y_train) 

# Predict the model with test dataset 

y2_pred = modelXGR.predict(x_test) 

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y2_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y2_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y2_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y2_pred), 3) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y2_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y2_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 11.664
Mean Squared Error (MSE): 154.616
Root Mean Squared Error (RMSE): 12.434
Mean Absolute Percentage Error (MAPE): 2.6 %
R2_score: 0.473
Adj R Square:  0.473


# Gradient Boosting Regressor

In [12]:
# Train the Gradient Boosting Regression algorithm and build the model with train dataset

from sklearn.ensemble import GradientBoostingRegressor

modelGBR = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', 
                                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, 
                                     min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, 
                                     max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, 
                                     validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

modelGBR.fit(x_train, y_train)

# Predict the model with test dataset

y3_pred = modelGBR.predict(x_test)

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y3_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y3_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y3_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y3_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y3_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y3_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.365
Mean Squared Error (MSE): 18.898
Root Mean Squared Error (RMSE): 4.347
Mean Absolute Percentage Error (MAPE): 0.74 %
R2_score: 0.936
Adj R Square:  0.936


# Random Forest Regressor

In [13]:
# Train the Random Forest Regression algorithm and build the model with train dataset

from sklearn.ensemble import RandomForestRegressor

modelRFR = RandomForestRegressor(n_estimators=100,criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                                 min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, 
                                 min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, 
                                 n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None)

modelRFR.fit(x_train, y_train)

# Predict the model with test dataset

y4_pred = modelRFR.predict(x_test)

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y4_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y4_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y4_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y4_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y4_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y4_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.3
Mean Squared Error (MSE): 18.985
Root Mean Squared Error (RMSE): 4.357
Mean Absolute Percentage Error (MAPE): 0.73 %
R2_score: 0.935
Adj R Square:  0.935


# Decision Tree Regressor

In [14]:
# Train the Decision Tree Regression algorithm and build the model with train dataset

from sklearn.tree import DecisionTreeRegressor

modelDTR = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                                 min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, 
                                 min_impurity_decrease=0.0, min_impurity_split=None, ccp_alpha=0.0)

modelDTR.fit(x_train, y_train)

# Predict the model with test dataset

y5_pred = modelDTR.predict(x_test)

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y5_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y5_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y5_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y5_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y5_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y5_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 4.354
Mean Squared Error (MSE): 33.661
Root Mean Squared Error (RMSE): 5.802
Mean Absolute Percentage Error (MAPE): 0.96 %
R2_score: 0.885
Adj R Square:  0.885


# KNN Regressor

In [15]:
# Train the KNN Regression algorithm and build the model with train dataset

from sklearn.neighbors import KNeighborsRegressor

modelKNN = KNeighborsRegressor(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', 
                               metric_params=None, n_jobs=None) 

modelKNN.fit(x_train, y_train) 

# Predict the model with test dataset 

y6_pred = modelKNN.predict(x_test) 

# Evaluation metrics for Regression analysis 

from sklearn import metrics 
print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y6_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y6_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y6_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y6_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y6_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y6_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.125
Mean Squared Error (MSE): 18.047
Root Mean Squared Error (RMSE): 4.248
Mean Absolute Percentage Error (MAPE): 0.69 %
R2_score: 0.938
Adj R Square:  0.938


# Extra Trees Regressor

In [16]:
# Train the Extra Trees Regression algorithm and build the model with train dataset

from sklearn.ensemble import ExtraTreesRegressor

modelETR = ExtraTreesRegressor(n_estimators=100,criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                               min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, 
                               min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=False, oob_score=False, 
                               n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None)

modelETR.fit(x_train, y_train)

# Predict the model with test dataset

y7_pred = modelETR.predict(x_test)

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y7_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y7_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y7_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y7_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y7_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y7_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.249
Mean Squared Error (MSE): 18.425
Root Mean Squared Error (RMSE): 4.292
Mean Absolute Percentage Error (MAPE): 0.72 %
R2_score: 0.937
Adj R Square:  0.937


# Support Vector Regressor

In [17]:
# Train the Support Vector Regression algorithm and build the model with train dataset

from sklearn.svm import SVR

modelSVR = SVR(kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, 
               cache_size=200, verbose=False, max_iter=- 1)

modelSVR.fit(x_train, y_train)

# Predict the model with test dataset

y8_pred = modelSVR.predict(x_test)

# Evaluation metrics for Regression analysis 

from sklearn import metrics 

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y8_pred),3)) 
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y8_pred),3)) 
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y8_pred)),3)) 
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y8_pred), 4) * 100, '%') 
print('R2_score:', round(metrics.r2_score(y_test, y8_pred),3)) 

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y8_pred),3) 
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3) 
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 3.395
Mean Squared Error (MSE): 20.141
Root Mean Squared Error (RMSE): 4.488
Mean Absolute Percentage Error (MAPE): 0.75 %
R2_score: 0.931
Adj R Square:  0.931
