In [1]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

# Notebooks
import nbimporter
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Functions Notebook
from Functions import holisticsTrainTest, calculateCost
from Functions import comfPMV, ppv_value, trainTest_tunedModel

Importing Jupyter notebook from Functions.ipynb


# Load Data

In [2]:
df_aux_65 = pd.read_csv("../datasets/TCS_65_participants_outsideData.csv", delimiter = ",")

# use_heuristics_participants = True

list_complete_participants = list(df_aux_65['Participant_No'].unique())

# if use_heuristics_participants:
#     list_complete_participants.append(10)
#     list_complete_participants.append(26)
#     list_complete_participants.append(28)
#     list_complete_participants.append(36)
#     list_complete_participants.append(37)
#     list_complete_participants.append(39)
#     list_complete_participants.append(47)
#     list_complete_participants.append(48)
#     list_complete_participants.append(53)
    

num_complete_participants = len(list_complete_participants)

In [3]:
df_feature1 = pd.read_pickle("df_feature1.pkl")
df_feature3 = pd.read_pickle("df_feature3.pkl")

print("Total number of instances: {}".format(df_feature1.shape[0]))

df_feature1_train, df_feature1_test, df_feature1_train_binary, df_feature1_test_binary = holisticsTrainTest(df_feature1,
                                                                                                           list_complete_participants)

df_feature3_train, df_feature3_test, df_feature3_train_binary, df_feature3_test_binary = holisticsTrainTest(df_feature1,
                                                                                                           list_complete_participants)

print("Number of training instances: {}".format(df_feature1_train.shape[0]))
print("Number of testing (validation) instances: {}".format(df_feature1_test.shape[0]))


Total number of instances: 2067
22
Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
22
Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
Number of training instances: 1467
Number of testing (validation) instances: 600


# Baseline1: CMU temperature setpoint for everyone -- 72-74 F

In [4]:
baseline1_low = 72.0 #fahrenheit
baseline1_high = 74.0 #fahrenheit

# Baseline 2: SPOT/SPOT* static setpoint (25 degree C)

In [5]:
baseline2_low = 76.0 #fahrenheit
baseline2_high = 78.0 #fahrenheit

# Baseline 3: Choose a setpoint where ppv is in [-0.5, 0.5] 

In [6]:
# from test set calculate ppv, get how many participants fall within -0.5, 0.5

# technically any df_featureX_test can be used since the only difference between them is the amount of columns they have
ta_1 = (df_feature1_test['Temperature (Fahrenheit)']  - 32) * 5 / 9 # convert to Celsius
ta_1 = ta_1.reset_index(drop=True)

tr = 25 # standard
vel = 0.2 # standard
rh = 0.60 # standard
met = 1.1 # standard
wme = 0

clo_1 = df_feature1_test['ClothingInsulation'] # standard is 0.8
clo_1 = clo_1.reset_index(drop=True)

holistic_pmv_list = []

for i in range(df_feature1_test.shape[0]):
    pmv, ppd = comfPMV(ta_1[i], tr, vel, rh, met, clo_1[i], wme)
    holistic_pmv_list.append(pmv)
    
holistic_pmv_array = np.array(holistic_pmv_list)
# clip to -2,+2
holistic_pmv_array = np.clip(holistic_pmv_array, a_min = -2, a_max = 2) 

baseline3_ppv = ppv_value(holistic_pmv_array, df_feature1_train, df_feature1_test, ta_1, tr, vel, rh, met, clo_1)


In [7]:
df_feature1_test = df_feature1_test.reset_index(drop=True)

baseline3_temps = []

for index, row in df_feature1_test.iterrows():       
    curr_temp = row['Temperature (Fahrenheit)']
    if (baseline3_ppv[index] >= -0.5) and (baseline3_ppv[index] <= 0.5):
        baseline3_temps.append(curr_temp)
        
baseline3_low = min(baseline3_temps)  #fahrenheit
baseline3_high = max(baseline3_temps) #fahrenheit

print(baseline3_low)
print(baseline3_high)

68.6999969482
81.0


# Baseline 4: Average/median temperature from test set

In [8]:
baseline4_average = df_feature1_test['Temperature (Fahrenheit)'].mean()
baseline4_median =  df_feature1_test['Temperature (Fahrenheit)'].median()

baseline4_average_low = baseline4_average - 2.0
baseline4_average_high = baseline4_average + 2.0

baseline4_median_low = baseline4_median - 2.0
baseline4_median_high = baseline4_median + 2.0

print(baseline4_average)
print(baseline4_median)

71.57351673122606
70.1999969482


# Baseline5: Body Shape Model


In [9]:
# load tuned model
rf_optimal_1 = pd.read_pickle("rf_optimal_1.pkl")

rf_optimal_3 = pd.read_pickle("rf_optimal_3.pkl")

# Baseline Evaluation

Find the distribution of comfort votes of test set in the baseline


In [10]:
total_rmse_1, accumulative_rmse_1 = calculateCost(df_feature1, list_complete_participants, baseline1_high, baseline1_low)

Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
1.0
1.0
Num Participants responses: 27
Num Participants responses: 31
0.0
4.0
Num Participants responses: 31
0.0
Num Participants responses: 30
0.0
Num Participants responses: 30
Num Participants responses: 23
Num Participants responses: 18
0.0
0.0
0.0
Num Participants responses: 30
1.0
Num Participants responses: 21
Num Participants responses: 23
Num Participants responses: 35
Num Participants responses: 25
0.0
0.0
1.0
Num Participants responses: 31
1.0
Num Participants responses: 31
Num Participants responses: 26
Num Participants responses: 24
Num Participants responses: 30
4.0
Num Participants responses: 28
Num Participants responses: 28
Num Participants responses: 27
Num Participants responses: 31
Num Participants responses: 20
Total RMSE across all participants: 0.9309493362512627
Accumulative of RMSE of each participant: 6.99156383156272


In [11]:
total_rmse_2, accumulative_rmse_2 = calculateCost(df_feature1, list_complete_participants, baseline2_high, 
                                                  baseline2_low)


Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 27
0.0
1.0
0.0
1.0
Num Participants responses: 31
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 31
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 30
0.0
0.0
Num Participants responses: 30
0.0
Num Participants responses: 23
0.0
0.0
Num Participants responses: 18
0.0
0.0
0.0
Num Participants responses: 30
0.0
Num Participants responses: 21
1.0
0.0
1.0
Num Participants responses: 23
0.0
0.0
0.0
1.0
0.0
0.0
Num Participants responses: 35
1.0
0.0
0.0
1.0
0.0
Num Participants responses: 25
1.0
0.0
0.0
0.0
Num Participants responses: 31
1.0
1.0
1.0
1.0
Num Participants responses: 31
0.0
0.0
0.0
Num Participants responses: 26
0.0
1.0
0.0
0.0
Num Participants responses: 24
0.0
0.0
0.0
0.0
Num Participants responses: 30
1.0
1.0
1.0
Num Participants responses: 28
0.0
0.0
1.0
0.0
N

In [12]:
total_rmse_3, accumulative_rmse_3 = calculateCost(df_feature1, list_complete_participants, baseline3_high, 
                                                  baseline3_low)


Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
1.0
1.0
0.0
0.0
0.0
Num Participants responses: 27
0.0
1.0
0.0
0.0
0.0
1.0
4.0
4.0
1.0
1.0
1.0
0.0
1.0
4.0
1.0
1.0
1.0
1.0
Num Participants responses: 31
0.0
1.0
0.0
0.0
0.0
1.0
0.0
0.0
1.0
0.0
4.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 31
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 30
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
1.0
0.0
1.0
0.0
0.0
0.0
Num Participants responses: 30
0.0
0.0
0.0
1.0
1.0
4.0
4.0
4.0
4.0
4.0
0.0
4.0
Num Participants responses: 23
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 18
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 30
1.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
0.0
1.0
0.0
1.0
Num Participants responses: 21
1.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
4.0
0.0
1.0
0.0
Num Participants responses: 23
0.0
0

In [13]:
total_rmse_4_average, accumulative_rmse_4_average = calculateCost(df_feature1, list_complete_participants, 
                                                                  baseline4_average_high, baseline4_average_low)
total_rmse_4_median, accumulative_rmse_4_median = calculateCost(df_feature1, list_complete_participants, 
                                                                baseline4_median_high, baseline4_median_low)

Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
1.0
1.0
Num Participants responses: 27
1.0
Num Participants responses: 31
4.0
0.0
Num Participants responses: 31
0.0
0.0
Num Participants responses: 30
1.0
0.0
0.0
Num Participants responses: 30
0.0
Num Participants responses: 23
0.0
Num Participants responses: 18
0.0
Num Participants responses: 30
1.0
0.0
1.0
Num Participants responses: 21
1.0
4.0
Num Participants responses: 23
0.0
0.0
0.0
Num Participants responses: 35
0.0
Num Participants responses: 25
Num Participants responses: 31
0.0
Num Participants responses: 31
Num Participants responses: 26
0.0
0.0
Num Participants responses: 24
0.0
Num Participants responses: 30
0.0
Num Participants responses: 28
0.0
1.0
Num Participants responses: 28
0.0
Num Participants responses: 27
1.0
Num Participants responses: 31
0.0
Num Participants responses: 20
Total RMSE across all participants: 0.7288689868556626
Accumulative of RMSE of each participant: 

In [14]:
total_rmse_5_1, accumulative_rmse_5_1 = calculateCost(df_feature1, list_complete_participants, 0, 0, 
                                                     rf_optimal_1, occuTherm=True)


Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
61.33000183
76.91999817
1.0
4.0
1.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
4.0
4.0
4.0
4.0
4.0
1.0
1.0
1.0
0.0
0.0
0.0
Num Participants responses: 27
64.40000153
81.5
1.0
1.0
1.0
1.0
4.0
4.0
4.0
0.0
1.0
0.0
0.0
0.0
1.0
4.0
4.0
4.0
4.0
1.0
1.0
1.0
4.0
4.0
4.0
4.0
0.0
1.0
4.0
1.0
1.0
1.0
1.0
Num Participants responses: 31
63.0999984741
81.0
0.0
0.0
1.0
1.0
1.0
4.0
4.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
0.0
0.0
1.0
0.0
4.0
4.0
4.0
4.0
4.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 31
66.4000015259
78.3000030518
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
1.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 30
62.9000015259
79.3000030518
1.0
0.0
1.0
4.0
0.0
1.0
1.0
4.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
1.0
0.0
1.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
0.0
0.0
Num Participants responses: 30
70.1999969482
80.0999984741
0.0
0.0
0.0
1.0
1.0
4.0
4.0
4.0
4.0
4.0


In [15]:
# mlp_1 = pd.read_pickle("mlp_1.pkl")

# total_rmse_5_1_mlp, accumulative_rmse_5_1_mlp = calculateCost(df_feature1, list_complete_participants, 0, 0, 
#                                                      mlp_1, occuTherm=True)


In [16]:
total_rmse_5_3, accumulative_rmse_5_3 = calculateCost(df_feature3, list_complete_participants, 0, 0, 
                                                     rf_optimal_3, occuTherm=True)


Testing on participants:
[ 7 14 16 21 26 30 35 36 43 45 47 49 51 61 63 64 66 67 68 72 78 79]
61.33000183
76.91999817
1.0
4.0
1.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
4.0
4.0
4.0
4.0
4.0
1.0
1.0
1.0
0.0
0.0
0.0
Num Participants responses: 27
64.40000153
81.5
1.0
1.0
1.0
1.0
4.0
4.0
4.0
0.0
1.0
0.0
0.0
0.0
1.0
4.0
4.0
4.0
4.0
1.0
1.0
1.0
4.0
4.0
4.0
4.0
0.0
1.0
4.0
1.0
1.0
1.0
1.0
Num Participants responses: 31
63.0999984741
81.0
0.0
0.0
1.0
1.0
1.0
4.0
4.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
0.0
0.0
1.0
0.0
4.0
4.0
4.0
4.0
4.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 31
63.7999992371
78.3000030518
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
1.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
Num Participants responses: 30
70.0
79.3000030518
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
1.0
0.0
1.0
0.0
0.0
0.0
Num Participants responses: 30
75.4000015259
80.0999984741
0.0
0.0
1.0
1.0
4.0
4.0
4.0
4.0
4.0
Num Participants responses: 23
64.9000015259
80.0999984741
0.0
0.