### Energy Efficiency of Buildings
#### 1.Loading Libraries, Data
#### 2.Data Summary 
#### 3.Duplicate, Missing and Outlier Data Detection
#### 4.Multiple Linear Regression
#### 5.Polynomial Regression
#### 6.Support Vector Machine Regression
#### 7.Decision Tree Regression
#### 8.Random Forest Regression 
#### 9.Conclusion


#### 1.Loading Libraries, Data

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
from sklearn.svm import SVR
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

In [None]:
df=pd.read_csv('ENB2012_data.csv')

#### 2.Data Summary

In [None]:
df.head()

In [None]:
df.describe().T

In [None]:
df.info()

#### 3.Duplicate, Missing and Outlier Data Detection

In [None]:
df.isnull().sum()

In [None]:
sum(df.duplicated())

In [None]:
def Outlier_Detect_SDM(dataframe):
    Outlier_data_index = []
    for a in dataframe.columns:
        std = dataframe[a].std()
        mean = dataframe[a].mean()
        
        lower_limit =mean - std*3
        upper_limit =mean + std*3
        
        for outlier in dataframe[a].values:
            if outlier > upper_limit or outlier < lower_limit:
                anomalies.append(dataframe.index[dataframe[a]==outlier].tolist())
    return Outlier_data_index

In [None]:
Outlier_Detect_SDM(df)

#### 4.Multiple Linear Regression

In [None]:
X = df.iloc[:,:-2]
y = df[["Heating_Load","Cooling_Load"]]

In [None]:
x_train , x_test , y_train , y_test = train_test_split(X , y , test_size=0.32 , random_state =25)

In [None]:
mlr=LinearRegression()
mlr.fit(x_train, y_train)
y_pred = mlr.predict(x_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = metrics.r2_score(y_test,y_pred)

print("Results of Multiple Linear Regression sklearn.metrics")
print("MAE:      ",round(mae,3))
print("MSE:      ", round(mse,3))
print("RMSE:     ", round(rmse,3))
print("R-Squared:", round(r2,3))

In [None]:
X_ols = sm.add_constant(X)
model = sm.OLS(endog=y[["Heating_Load"]],exog= X_ols[["const","Relative_Compactness","Surface_Area","Wall_Area","Roof_Area","Overall_Height","Orientation","Glazing_Area","Glazing_Area_Distribution"]] ).fit()

print(model.summary())

##### p-value of Orientation is higher 0.05. Lets drop Orientation column using Backward Elimination Method.

In [None]:
X_ols = sm.add_constant(X)
model = sm.OLS(endog=y[["Heating_Load"]],exog= X_ols[["const","Relative_Compactness","Surface_Area","Wall_Area","Roof_Area","Overall_Height","Glazing_Area","Glazing_Area_Distribution"]] ).fit()

print(model.summary())

#### 5.Polynomial Regression


In [None]:
poly_reg = PolynomialFeatures(degree=2, include_bias=False)
x_poly = poly_reg.fit_transform(X)

In [None]:
x_train , x_test , y_train , y_test = train_test_split(x_poly, y, test_size=0.32 , random_state =61)

In [None]:
poly_linreg = LinearRegression()
poly_linreg.fit(x_train,y_train)
y_pred = poly_linreg.predict(x_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = metrics.r2_score(y_test,y_pred)

print("Results of Polynomial Regression sklearn.metrics")
print("MAE:      ",round(mae,3))
print("MSE:      ", round(mse,3))
print("RMSE:     ", round(rmse,3))
print("R-Squared:", round(r2,3))

#### 6.Support Vector Machine Regression

In [None]:
X = df.iloc[:,:-2].values
y= df[["Heating_Load"]].values

x_train, x_test, y_train, y_test  = train_test_split(X, y, test_size=0.28, random_state=0)

In [None]:
X_sc = StandardScaler()
y_sc = StandardScaler()
x_train = X_sc.fit_transform(x_train)
y_train = y_sc.fit_transform(y_train).ravel()

In [None]:
regressor = SVR(kernel = 'rbf')
regressor.fit(x_train, y_train)

In [None]:
y_pred = regressor.predict(X_sc.transform(x_test))
y_pred= y_pred.reshape((len(y_pred), 1))
y_pred = y_sc.inverse_transform(y_pred)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = metrics.r2_score(y_test,y_pred)

print("Results of Support Vector Machine Regression sklearn.metrics")
print("MAE:      ",round(mae,3))
print("MSE:      ", round(mse,3))
print("RMSE:     ", round(rmse,3))
print("R-Squared:", round(r2,3))

#### 7.Decision Tree Regression

In [None]:
X = df.iloc[:,:-2].values
y = df[["Heating_Load","Cooling_Load"]].values

x_train, x_test, y_train, y_test  = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
dt_reg = DecisionTreeRegressor(max_depth=4)
dt_reg.fit(x_train,y_train)
y_pred=dt_reg.predict(x_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse) 
r2 = metrics.r2_score(y_test,y_pred)


print("Results of Decision Tree Regression sklearn.metrics")
print("MAE:      ",round(mae,3))
print("MSE:      ", round(mse,3))
print("RMSE:     ", round(rmse,3))
print("R-Squared:", round(r2,3))

#### 8.Random Forest Regression 

In [None]:
X = df.iloc[:,:-2].values
y = df[["Heating_Load","Cooling_Load"]].values


x_train, x_test, y_train, y_test  = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
rfr_reg = RandomForestRegressor(n_estimators=8,max_depth=3,bootstrap=True)
rfr_reg.fit(x_train,y_train)
y_pred = rfr_reg.predict(x_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse) 
r2 = metrics.r2_score(y_test,y_pred)


print("Results of Random Forest Regression  sklearn.metrics")
print("MAE:      ",round(mae,3))
print("MSE:      ", round(mse,3))
print("RMSE:     ", round(rmse,3))
print("R-Squared:", round(r2,3))