# Sensitivity Analysis


# 1. Import and Load

In [None]:
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
# import shap

from scipy import stats
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# parameters search
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

# models
from sklearn.ensemble import RandomForestRegressor

# explain
from sklearn.tree import export_graphviz
import lime
import lime.lime_tabular

from datetime import datetime
import os
import pathlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [None]:
Bldg = pd.read_csv("../Data/microclimate_model/Combined/all_buildings_9pm.csv")
Bldg = Bldg.drop(columns = ['Unnamed: 0','CHWTON', 'Minute', 'DSW Top',
       'DSW North', 'DSW East', 'DSW South', 'DSW West'])

In [None]:
# only select 3 buildings
Bldg = Bldg.loc[(Bldg['bldgname']=='Psychology North')].append(Bldg.loc[(Bldg['bldgname']=='Psychology')]).append(Bldg.loc[(Bldg['bldgname']=='ISTB 4')])
Bldg.reset_index(inplace = True, drop = True)

# 2. Feature Engineering Temperature and Humidity

In [None]:
# Get Air Temp X Abs Hum
Bldg = Bldg.drop(columns = ['Rel Hum'])

Bldg['AirTempXAbsHum'] = Bldg['Air Temp'] * Bldg['Abs Hum']
Bldg['AirTempSq'] = Bldg['Air Temp'] * Bldg['Air Temp']
Bldg['AbsHumSq'] = Bldg['Abs Hum'] * Bldg['Abs Hum']
Bldg = Bldg.drop(columns = ['Abs Hum'])

# Remove August and September data
Bldg = Bldg[~((Bldg['Month'] == 8) | (Bldg['Month'] == 9))]

Bldg.reset_index(inplace = True, drop = True)

# 3. EDA

## 3.1 Scatter Plots

In [None]:
g = sns.FacetGrid(Bldg, col="bldgname")
g.map(sns.scatterplot, "Air Temp", "CHWTON/SQFT", alpha = 0.8)
g.add_legend()

## 3.2 Normalisation and pearson's R

In [None]:
# apply the maximum absolute scaling in Pandas using the .abs() and .max() methods
def max_absolute_scaling(df):
    # copy the dataframe
    df_scaled = df.copy()
    # apply maximum absolute scaling
    for column in df_scaled.columns:
        df_scaled[column] = df_scaled[column]  / df_scaled[column].abs().max()
    return df_scaled
    


In [None]:
Bldg_airtemp_chwton = Bldg[['Air Temp' , 'CHWTON/SQFT']]

In [None]:
# scale 2 columns
Bldg_airtemp_chwton_scaled = max_absolute_scaling(Bldg_airtemp_chwton)

In [None]:
Bldg_airtemp_chwton_scaled['bldgname'] = Bldg['bldgname']
Bldg_scaled = Bldg_airtemp_chwton_scaled[['bldgname', 'Air Temp' , 'CHWTON/SQFT' ]]

In [None]:
psych = Bldg_scaled[Bldg_scaled['bldgname'] == 'Psychology']
psych_north = Bldg_scaled[Bldg_scaled['bldgname'] == 'Psychology North']
istb4 = Bldg_scaled[Bldg_scaled['bldgname'] == 'ISTB 4']

### 3.2.1 using Scipy stats.pearsonr

In [None]:
print('Psych: \n',stats.pearsonr(psych['Air Temp'], psych['CHWTON/SQFT']))
print('Psych North: \n',stats.pearsonr(psych_north['Air Temp'], psych_north['CHWTON/SQFT']))
print('ISTB 4: \n',stats.pearsonr(istb4['Air Temp'], istb4['CHWTON/SQFT']))


### 3.2.2 using Numpy's np.corrcoef(x,y)

In [None]:
# convert df to numpy
psych_num = psych.drop(columns = ['bldgname'])
psychN_num = psych_north.drop(columns = ['bldgname'])
istb4_num = istb4.drop(columns = ['bldgname'])


In [None]:
# psych
psych_airtemp = psych_num['Air Temp'].to_numpy()
psych_chwton = psych_num['CHWTON/SQFT'].to_numpy()

# psych_north
psychN_airtemp = psychN_num['Air Temp'].to_numpy()
psychN_chwton = psychN_num['CHWTON/SQFT'].to_numpy()

# istb4
istb4_airtemp = istb4_num['Air Temp'].to_numpy()
istb4_chwton = istb4_num['CHWTON/SQFT'].to_numpy()

In [None]:
print('Psych: \n', np.corrcoef(psych_airtemp, psych_chwton))
print('Psych North: \n', np.corrcoef(psychN_airtemp, psychN_chwton))
print('ISTB4: \n', np.corrcoef(istb4_airtemp, istb4_chwton))


Observation:
1. Pearson’s r
2. A p-value. This is the probability that the true value of r is zero (no correlation).

We conclude that there is a strong linear relationship between air temperature and CHWTON/SQFT

# 4. Training

## 4.1 Data preparation

In [None]:
# Extract Test data (June 9th)
Test_df = Bldg[(Bldg['Month'] == 6) & (Bldg['Day'] == 9)]
Test_df.reset_index(drop = True, inplace = True)
Test_df

# Remove Test From Bldg df 
Bldg = Bldg[~((Bldg['Month'] == 6) & (Bldg['Day'] == 9))]
Bldg.reset_index(drop = True, inplace = True)

# Check if the day is still there
Bldg[(Bldg['Month'] == 6) & (Bldg['Day'] == 9)]

In [None]:
corrMatrix = Bldg.corr(method = 'pearson')
plt.figure(figsize=(15,15))
sns.heatmap(corrMatrix, cmap='RdYlGn', annot = True, linewidths = 1)
plt.show()

In [1]:
!jupyter labextension install @jupyter-widgets/jupyterlab-manager





Building jupyterlab assets (production, minimized)
