In [66]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
import numpy as np
import pandas as pd

In [68]:
data = pd.read_csv('/content/Building Energy Efficiency.csv')   # loading the dataset

In [69]:
data.head()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
0,0.7638,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [70]:
data.tail()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
763,0.64,784.0,343.0,220.5,3.5,5,0.4,5,17.88,21.4
764,0.62,808.5,367.5,220.5,3.5,2,0.4,5,16.54,16.88
765,0.62,808.5,367.5,220.5,3.5,3,0.4,5,16.44,17.11
766,0.62,808.5,367.5,220.5,3.5,4,0.4,5,16.48,16.61
767,0.62,808.5,367.5,220.5,3.5,5,0.4,5,16.64,16.03


In [71]:
data.shape

(768, 10)

In [72]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Relative Compactness       768 non-null    float64
 1   Surface Area               768 non-null    float64
 2   Wall Area                  768 non-null    float64
 3   Roof Area                  768 non-null    float64
 4   Overall Height             768 non-null    float64
 5   Orientation                768 non-null    int64  
 6   Glazing Area               768 non-null    float64
 7   Glazing Area Distribution  768 non-null    int64  
 8   Heating Load               768 non-null    float64
 9   Cooling Load               768 non-null    float64
dtypes: float64(8), int64(2)
memory usage: 60.1 KB


In [73]:
data.describe()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.763885,671.708333,318.5,176.604167,5.25,3.5,0.234375,2.8125,22.307201,24.58776
std,0.10549,88.086116,43.626481,45.16595,1.75114,1.118763,0.133221,1.55096,10.090196,9.513306
min,0.62,514.5,245.0,110.25,3.5,2.0,0.0,0.0,6.01,10.9
25%,0.6825,606.375,294.0,140.875,3.5,2.75,0.1,1.75,12.9925,15.62
50%,0.75,673.75,318.5,183.75,5.25,3.5,0.25,3.0,18.95,22.08
75%,0.82,741.125,343.0,220.5,7.0,4.25,0.4,4.0,31.6675,33.1325
max,0.98,808.5,416.5,220.5,7.0,5.0,0.4,5.0,43.1,48.03


In [74]:
data.isnull().sum()

Relative Compactness         0
Surface Area                 0
Wall Area                    0
Roof Area                    0
Overall Height               0
Orientation                  0
Glazing Area                 0
Glazing Area Distribution    0
Heating Load                 0
Cooling Load                 0
dtype: int64

In [75]:
# Feature Engineering

# Create a new feature for the ratio of Glazing Area to Floor Area
data['Glazing_Area_Ratio'] = data['Glazing Area'] / 100

# Create a new feature for the ratio of Wall Area to Roof Area
data['Wall_to_Roof_Ratio'] = data['Wall Area'] / data['Roof Area']

# Normalize numerical features using StandardScaler
numerical_features = ['Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Glazing Area', 'Heating Load', 'Cooling Load']
scaler = StandardScaler()
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [76]:
# Splitting the data into training and testing sets
X = data.drop(['Heating Load', 'Cooling Load'], axis=1)  # Features
y_heat = data['Heating Load']  # Target for heating load prediction
y_cool = data['Cooling Load']  # Target for cooling load prediction

In [77]:
X_train, X_test, y_heat_train, y_heat_test, y_cool_train, y_cool_test = train_test_split(
    X, y_heat, y_cool, test_size=0.2, random_state=42
)

In [78]:
# Cross-Validation

rf_regressor = RandomForestRegressor(random_state=42)   # Using Random Forest Regressor ML Model here
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)  # KFold Cross-Validation

accuracies=cross_val_score(estimator=rf_regressor,X=X,y=y_heat,cv=25)
accuracies

array([0.09764976, 0.93415533, 0.98726434, 0.99877265, 0.9971796 ,
       0.99796463, 0.99419828, 0.99780769, 0.99459183, 0.99624379,
       0.99811625, 0.9970113 , 0.99618402, 0.9976607 , 0.99676857,
       0.99721593, 0.99830034, 0.99824353, 0.99711678, 0.99922415,
       0.98922926, 0.99593289, 0.99518456, 0.99584199, 0.99007405])

In [92]:
accuracies.mean()

0.9575172889036315

In [79]:
# Shuffle Split Cross-Validation

cv=ShuffleSplit(n_splits=15,test_size=0.3,random_state=0)
cross_val_score(rf_regressor,X,y_cool,cv=cv)

array([0.96820845, 0.95647167, 0.96591127, 0.97304531, 0.97409938,
       0.95397946, 0.95800003, 0.95628783, 0.96746512, 0.96987477,
       0.96262017, 0.96582819, 0.96848005, 0.96031029, 0.97255859])

In [93]:
accuracies.mean()

0.9575172889036315

In [86]:
# Using the SVR ML Model for predictions here

svm_regressor = SVR(kernel='linear')  # 'linear' kernel is used
svm_regressor.fit(X_train, y_heat_train)   # fitting the model on 'y_heat' test data
svm_regressor.fit(X_train, y_cool_train)  # fitting the model on 'y_cool' test data

In [87]:
y_heat_train_pred = svm_regressor.predict(X_train)
y_cool_train_pred = svm_regressor.predict(X_train)

In [89]:
y_heat_pred = svm_regressor.predict(X_test)
print(y_heat_pred)

[-0.53047454 -0.86992943  0.8599501   1.14393364 -0.80989226  0.56472562
  0.40012451  0.62724474 -0.58492658  0.51549237 -0.56888937  1.06247777
  0.5308114  -1.26169249 -0.58205385  1.29302858  1.30005008 -1.03042349
 -0.74748969  1.06734472  1.02798179  1.1320452  -1.09218271  0.67022539
 -1.04116125  0.67509234  0.98361549  1.04258264 -0.81691376 -0.74786087
 -1.26871399 -1.02212596 -0.81061044  0.79455825  0.68267169  0.63697864
  0.87383276  0.67780474  0.98848244 -1.20244833  0.63913319 -1.03529044
 -1.35782463  0.9328486  -1.3096759  -1.35295768 -1.24269206 -1.20731528
  0.69942505  0.76351676  0.98433367 -0.8670567  -0.8546104   0.91681342
 -0.96272147 -1.03701249 -1.03185986  0.80716488  1.52645212  1.29374676
 -0.68258557 -0.52903818 -0.85245585  0.5751777   0.64400014  0.68482624
  0.67995929 -1.10191661 -0.97912986 -1.03114168  0.5237899   0.86138647
 -0.92430664  0.5681562   0.93213041 -1.02699291  1.03915205  0.79599461
  0.85851373  0.30719463  0.39669393  0.90221257 -1

In [90]:
y_cool_pred = svm_regressor.predict(X_test)
print(y_cool_pred)

[-0.53047454 -0.86992943  0.8599501   1.14393364 -0.80989226  0.56472562
  0.40012451  0.62724474 -0.58492658  0.51549237 -0.56888937  1.06247777
  0.5308114  -1.26169249 -0.58205385  1.29302858  1.30005008 -1.03042349
 -0.74748969  1.06734472  1.02798179  1.1320452  -1.09218271  0.67022539
 -1.04116125  0.67509234  0.98361549  1.04258264 -0.81691376 -0.74786087
 -1.26871399 -1.02212596 -0.81061044  0.79455825  0.68267169  0.63697864
  0.87383276  0.67780474  0.98848244 -1.20244833  0.63913319 -1.03529044
 -1.35782463  0.9328486  -1.3096759  -1.35295768 -1.24269206 -1.20731528
  0.69942505  0.76351676  0.98433367 -0.8670567  -0.8546104   0.91681342
 -0.96272147 -1.03701249 -1.03185986  0.80716488  1.52645212  1.29374676
 -0.68258557 -0.52903818 -0.85245585  0.5751777   0.64400014  0.68482624
  0.67995929 -1.10191661 -0.97912986 -1.03114168  0.5237899   0.86138647
 -0.92430664  0.5681562   0.93213041 -1.02699291  1.03915205  0.79599461
  0.85851373  0.30719463  0.39669393  0.90221257 -1

In [91]:
# Analysis of Bias & Variance
# on 'y_heat' test data
mse_train = mean_squared_error(y_heat_train, y_heat_train_pred)
mse_test = mean_squared_error(y_heat_test, y_heat_pred)
bias = np.mean((y_heat_train - np.mean(y_heat_train_pred))**2)
variance = np.mean([((y - np.mean(y_heat_train_pred)) ** 2) for y in y_heat_train_pred])
print(f"Mean Squared Error (Training): {mse_train}")
print(f"Mean Squared Error (Testing): {mse_test}")
print(f"Bias: {bias}")
print(f"Variance: {variance}")

Mean Squared Error (Training): 0.09405033883824925
Mean Squared Error (Testing): 0.10255039182232786
Bias: 0.9942371216468903
Variance: 0.8260583810780187


In [95]:
# on 'y_cool' test data
from sklearn.metrics import accuracy_score

mse_train = mean_squared_error(y_cool_train, y_cool_train_pred)
mse_test = mean_squared_error(y_cool_test, y_cool_pred)
bias = np.mean((y_cool_train - np.mean(y_cool_train_pred))**2)
variance = np.mean([((y - np.mean(y_cool_train_pred)) ** 2) for y in y_cool_train_pred])
print(f"Mean Squared Error (Training): {mse_train}")
print(f"Mean Squared Error (Testing): {mse_test}")
print(f"Bias: {bias}")
print(f"Variance: {variance}")


Mean Squared Error (Training): 0.11902266575307623
Mean Squared Error (Testing): 0.1223227046965235
Bias: 0.9933589605142413
Variance: 0.8260583810780187
