##### Regression Using Keras
Features Col:  AT-Atmospheric Temp, V-Vacuum Pressure, AP-Atmospheric Pressure, RH-Relative Humidity

Target Col: PE - Power O/P

In [50]:
from warnings import filterwarnings; filterwarnings('ignore')

# Step-1: Data Ingestion
#-----------------------

import pandas as pd; df = pd.read_csv('PowerPlant.csv'); df.head(2)

# Step-2: Data Sanity Checks - Duplicate Removal , Null checks, High Categorical Non Unique Removal etc...
#---------------------------------------------------------------------------------------------------------

duplicate_count = df.duplicated().sum()
if duplicate_count > 0:
    df = df.drop_duplicates(keep='first').reset_index(drop=True)
    print('Duplicates removed count:', duplicate_count)
else:
    print('No Duplicates found..')

m = df.isna().sum()
mm = m[m > 0]
if not mm.empty:
    print('Null Values Found')
else:
    print('Null Values does not exist')

# Step-3: Separate X and Y
#-------------------------

X = df.drop(columns=['PE']); Y = df[['PE']]

# Step-4: Apply Train Test Split
#------------------------------

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, random_state=42)

# Step-5: Apply Preprocessing on X
#---------------------------------

from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

num_cols = X.select_dtypes(include='number').columns
num_pipe = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())
pre = ColumnTransformer([('num', num_pipe, num_cols)]).set_output(transform='pandas')
pre.fit(xtrain)

xtrain_pre = pre.transform(xtrain); xtest_pre = pre.transform(xtest)

# Step-6: Build Network Model
#----------------------------

from keras.models import Sequential
from keras.layers import Input, Dense

model = Sequential(
    [
        Input(shape=(xtrain_pre.shape[1], )),
        Dense(8, activation='relu'),
        Dense(4, activation='relu'),
        Dense(1, activation='relu')
    ]
)

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(xtrain_pre, ytrain, validation_split=0.2, epochs=60, verbose=False)

# Step-7: Evaluate Model
#-----------------------

train_mse, train_mae = model.evaluate(xtrain_pre, ytrain, verbose=False)
print('Train MSE:', round(train_mse, 2), ', Train MAE:', round(train_mae, 2))

test_mse, test_mae = model.evaluate(xtest_pre, ytest, verbose=False)
print('Test MSE:', round(test_mse, 2), ', Test MAE:', round(test_mae, 2))

# Evaluate Model
from sklearn.metrics import (
    root_mean_squared_error,
    mean_absolute_error,
    mean_absolute_percentage_error,
    r2_score
)

def evaluate_model(model, x, y):
    ypred = model.predict(x, verbose=False)
    rmse = root_mean_squared_error(y, ypred)
    mae = mean_absolute_error(y, ypred)
    mape = mean_absolute_percentage_error(y, ypred),
    r2 = r2_score(y, ypred)
    return f'[RMSE : {rmse:.2f}, MAE: {mae:.2f}, MAPE: {mape[0]:.2%}, r2_score: {r2:.2%}]'

print('Train Model Metrics:', evaluate_model(model, xtrain_pre, ytrain))
print('Test Model Metrics:', evaluate_model(model, xtest_pre, ytest))

# Step-8: Out of Sample Prediction
xnew = pd.read_csv('test_PowerPlant.csv')
xnew_pre = pre.transform(xnew)

pe_preds = model.predict(xnew_pre, verbose=False)
xnew['PE_Preds'] = pe_preds.round(2)
xnew.to_csv('test_PowerPlant_results.csv')



Duplicates removed count: 41
Null Values does not exist
Train MSE: 20.25 , Train MAE: 3.55
Test MSE: 20.57 , Test MAE: 3.58
Train Model Metrics: [RMSE : 4.50, MAE: 3.55, MAPE: 0.78%, r2_score: 92.96%]
Test Model Metrics: [RMSE : 4.54, MAE: 3.58, MAPE: 0.79%, r2_score: 93.18%]
