In [20]:
import pandas as pd

## Model Training

In [21]:
file_path = 'data\ENB2012_data.csv'
df = pd.read_csv(file_path)

In [22]:
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution,heating_load_(HL),cooling_load_(CL)
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [23]:
## Independent and dependent features
X = df.drop(labels=['heating_load_(HL)','cooling_load_(CL)'],axis=1)
Y = df[['heating_load_(HL)','cooling_load_(CL)']]

In [24]:
Y

Unnamed: 0,heating_load_(HL),cooling_load_(CL)
0,15.55,21.33
1,15.55,21.33
2,15.55,21.33
3,15.55,21.33
4,20.84,28.28
...,...,...
763,17.88,21.40
764,16.54,16.88
765,16.44,17.11
766,16.48,16.61


In [25]:
#Segrigation
numerical_cols=X.select_dtypes(exclude='object').columns

In [26]:
X

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0
...,...,...,...,...,...,...,...,...
763,0.64,784.0,343.0,220.50,3.5,5,0.4,5
764,0.62,808.5,367.5,220.50,3.5,2,0.4,5
765,0.62,808.5,367.5,220.50,3.5,3,0.4,5
766,0.62,808.5,367.5,220.50,3.5,4,0.4,5


In [27]:
from sklearn.impute import SimpleImputer ## Handling missing value
from sklearn.preprocessing import StandardScaler #Handling feature Scalling
from sklearn.preprocessing import OrdinalEncoder #Ordenal encoding
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [28]:
# Define which columns should be treated with which transformer
num_pipeline = Pipeline(
    steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
    ]

)


# Create the preprocessor
preprocessor = ColumnTransformer([
('num_pipeline',num_pipeline,numerical_cols)
        
    ])


In [29]:
## train test split

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.30,random_state=30)

In [30]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [31]:
X_train.head()

Unnamed: 0,num_pipeline__relative_compactness,num_pipeline__surface_area,num_pipeline__wall_area,num_pipeline__roof_area,num_pipeline__overall_height,num_pipeline__orientation,num_pipeline__glazing_area,num_pipeline__glazing_area_distribution
0,0.253635,-0.403766,0.586525,-0.677968,1.016903,1.310827,1.236682,1.41701
1,-0.500424,0.428033,-1.115982,0.955849,-0.983378,1.310827,1.236682,-1.125795
2,-0.217652,0.150767,-1.683485,0.955849,-0.983378,1.310827,1.236682,0.145608
3,-0.688939,0.705299,-0.54848,0.955849,-0.983378,-1.335467,0.123046,-0.490094
4,-1.34874,1.537098,1.154027,0.955849,-0.983378,1.310827,-0.99059,0.145608


In [32]:
# model training
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [33]:
regression=LinearRegression()
regression.fit(X_train,y_train)

In [34]:
regression.coef_

array([[-7.73556408e+00, -4.35376361e+00,  6.10810758e-01,
        -4.56890780e+00,  6.96924911e+00,  6.12297444e-03,
         2.66461219e+00,  2.08601847e-01],
       [-8.15405455e+00, -4.33567532e+00, -4.81497421e-02,
        -4.23495649e+00,  7.45635388e+00,  1.89634643e-01,
         1.98399639e+00, -3.09886900e-02]])

In [35]:
regression.intercept_

array([22.04687151, 24.42271881])

In [36]:
import numpy as np
def evaluate_model(true,predicted):
    mae=mean_absolute_error(true,predicted)
    mse=mean_squared_error(true,predicted)
    rmse=np.sqrt(mean_squared_error(true,predicted))
    r2_square=r2_score(true,predicted)
    return mae, rmse,r2_square

In [37]:
## train multiple models

models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet()
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #make prediction
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE",mae)
    print("R2_score",r2_square*100)

    r2_list.append(r2_square)

    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 2.940429725822968
MAE 2.0530316000422935
R2_score 90.91851440872401


Lasso
Model Training Performance
RMSE: 3.421678623373283
MAE 2.523741173467519
R2_score 87.74041196597899


Ridge
Model Training Performance
RMSE: 2.9306692355025694
MAE 2.039515598544376
R2_score 90.97797834516925


Elasticnet
Model Training Performance
RMSE: 4.0144311309786245
MAE 3.019889953752175
R2_score 83.18980130923683


