In [1]:
# Importing the libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Ignore harmless warnings

import warnings
warnings.filterwarnings("ignore")

# Set to display all the columns in dataset

pd.set_option("display.max_columns", None)

# Import psql to run queries

import pandasql as psql

In [2]:
# Load the data from all the work sheets

power_plant_all = pd.concat(pd.read_excel(r"C:\Users\Anil\Desktop\data_science\60 Session 20-Aug-2021-20210820\Power_Plant.xlsx",
sheet_name=None), ignore_index=True)
power_plant_all.head()



Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [3]:
power_plant_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47840 entries, 0 to 47839
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      47840 non-null  float64
 1   V       47840 non-null  float64
 2   AP      47840 non-null  float64
 3   RH      47840 non-null  float64
 4   PE      47840 non-null  float64
dtypes: float64(5)
memory usage: 1.8 MB


In [5]:
# Identify the independent and Target (dependent) variables

IndepVar = []
for col in power_plant_all.columns:
    if col != 'PE':
        IndepVar.append(col)

TargetVar = 'PE'

x = power_plant_all[IndepVar]
y = power_plant_all[TargetVar]

In [10]:
# Split the data into train and test (random sampling)

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=6)
x_test_F1 = x_test.copy()

In [11]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train = mmscaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train)

x_test = mmscaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test)

## AdaBoost Regressor

In [12]:
# Create adaboost regressor object

from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

# Train Adaboost regressor

modelDT=DecisionTreeRegressor(max_depth=1)

modelABR = AdaBoostRegressor(n_estimators=50, base_estimator=modelDT ,learning_rate=1)

# Fit the model with train data

modelABR.fit(x_train,y_train)

AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=1),
                  learning_rate=1)

In [13]:
#Predict the response for test dataset

y_pred = modelABR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y_pred),3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y_pred)),3))
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y_pred), 2) * 100, '%')
print('R2_score:', round(metrics.r2_score(y_test, y_pred),3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y_pred),3)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 6.162
Mean Squared Error (MSE): 59.093
Root Mean Squared Error (RMSE): 7.687
Mean Absolute Percentage Error (MAPE): 1.0 %
R2_score: 0.794
Adj R Square:  0.794


## ADA boost regressor with SVR as a Base Estimator

In [None]:
# Create adaboost classifer object

from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import SVR

# Train Adaboost regressor

modelSVR = SVR(kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1,
shrinking=True, cache_size=200, verbose=False, max_iter=- 1)

modelABR = AdaBoostRegressor(n_estimators=50, base_estimator=modelSVR ,learning_rate=1)

# Fit the model with train data

modelABR.fit(x_train,y_train)

#Predict the response for test dataset

y1_pred = modelABR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y1_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y1_pred),3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y1_pred)),3))
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y1_pred), 2) * 100, '%')
print('R2_score:', round(metrics.r2_score(y_test, y1_pred),3))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y1_pred),3)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),3)
print('Adj R Square: ', adjusted_r_squared)

## gradient boosting regressor

In [16]:
# Build the model with Gradient Boosting Regressor

from sklearn.ensemble import GradientBoostingRegressor

modelGBR = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0,
                                     criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1,
                                     min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0,
                                     min_impurity_split=None, init=None, random_state=None, max_features=None,
                                     alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False,
                                     validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)
modelGBR.fit(x_train, y_train)

# Predict the model with test dataset

y2_pred = modelGBR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y2_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y2_pred),3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y2_pred)),3))
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y2_pred), 3) * 100, '%')
print('R2_score:', round(metrics.r2_score(y_test, y2_pred),6))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y2_pred),6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)                                     

Mean Absolute Error (MAE): 3.04
Mean Squared Error (MSE): 15.782
Root Mean Squared Error (RMSE): 3.973
Mean Absolute Percentage Error (MAPE): 0.7000000000000001 %
R2_score: 0.945057
Adj R Square:  0.945052


## DTR

In [17]:
# Build the Decision Tree Regressor model

from sklearn.tree import DecisionTreeRegressor

modelDT = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=None, min_samples_split=2,
min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None,
random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, ccp_alpha=0.0)
modelDT.fit(x_train,y_train)

# Predict the model with test dataset

y3_pred = modelDT.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y3_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y3_pred),3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y3_pred)),3))
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y3_pred), 3) * 100, '%')
print('R2_score:', round(metrics.r2_score(y_test, y3_pred),6))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y3_pred),6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 2.074
Mean Squared Error (MSE): 17.17
Root Mean Squared Error (RMSE): 4.144
Mean Absolute Percentage Error (MAPE): 0.5 %
R2_score: 0.940225
Adj R Square:  0.94022


## RFR

In [18]:
# Build the Random Forest Regressor model

from sklearn.ensemble import RandomForestRegressor

modelRF = RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse', max_depth=None,
max_features='auto', max_leaf_nodes=None, max_samples=None,
min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100,
n_jobs=None, oob_score=False, random_state=42, verbose=0, warm_start=False)

modelRF.fit(x_train, y_train.values.ravel())

# Predict the model with test dataset

y4_pred = modelRF.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y4_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y4_pred),3))
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y4_pred)),3))
print('Mean Absolute Percentage Error (MAPE):', round(metrics.mean_absolute_percentage_error(y_test, y4_pred), 3) * 100, '%')
print('R2_score:', round(metrics.r2_score(y_test, y3_pred),6))

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y4_pred),6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 1.67
Mean Squared Error (MSE): 7.013
Root Mean Squared Error (RMSE): 2.648
Mean Absolute Percentage Error (MAPE): 0.4 %
R2_score: 0.940225
Adj R Square:  0.975582
