In [2]:
# Notebooks
import nbimporter
import os
import sys

# Functions from src
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# Defined Functions
from utils import *

# Pandas, matplotlib, pickle, seaborn
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from statistics import mean 
from collections import Counter
from imblearn.over_sampling import SMOTE, SMOTENC, ADASYN


In [3]:
# global constants
num_trials = 30
test_size_percentage = 0.2
fixed_depth = 10


# Load TCS Dataset

In [4]:
df_tcs = pd.read_pickle("data/occutherm/df_feature1.pkl") 
del df_tcs['Participant_No'] # original dataset contains participant number

df_tcs_train = pd.read_pickle("data/occutherm/df_feature1_train.pkl") 
df_tcs_test = pd.read_pickle("data/occutherm/df_feature1_test.pkl")

dataset_string = "occutherm"

# total count for instances per class: 818
print(df_tcs_train.describe())


       Temperature (Fahrenheit)  SkinTemperature  ClothingInsulation  \
count               1508.000000      1508.000000         1508.000000   
mean                  71.453707        85.207610            0.558176   
std                    6.221285         5.362427            0.198067   
min                   60.070000        62.986781            0.000000   
25%                   65.599998        81.927500            0.410000   
50%                   70.199997        85.376000            0.490000   
75%                   77.634998        88.598001            0.680000   
max                   85.000000       110.235782            1.070000   

        Height(cm)  Shoulder Circumference(cm)  Weight(lbs)       Gender  \
count  1508.000000                 1508.000000  1508.000000  1508.000000   
mean    169.909218                  109.055637   152.835411     0.443634   
std       9.215815                   10.985466    30.818397     0.496978   
min     151.000000                   89.500000 

# Classification models on train data (imbalanced)

In [4]:
acc_rdf, rdf_real_model = train_rdf(df_tcs_train, rdf_depth=fixed_depth, test_size_percentage=test_size_percentage)


Features: ['Temperature (Fahrenheit)' 'SkinTemperature' 'ClothingInsulation'
 'Height(cm)' 'Shoulder Circumference(cm)' 'Weight(lbs)' 'Gender'
 'Temperature_outside' 'Humidity_outside']
Expected accuracy (f1 micro) based on Cross-Validation:  0.4958677685950413
GaussianNB(priors=None, var_smoothing=1e-09)
Number of folds: 10
Best parameters set found on development set:
{'algorithm': 'brute', 'metric': 'seuclidean', 'n_neighbors': 14, 'weights': 'distance'}
Number of folds: 10
Best parameters set found on development set:
{'C': 1000, 'class_weight': 'balanced', 'gamma': 0.1, 'kernel': 'rbf'}
Number of folds: 10
Best parameters set found on development set:
{'class_weight': 'balanced', 'criterion': 'gini', 'min_samples_split': 2, 'n_estimators': 100}


In [5]:
print("rdf acc CV: {}".format(acc_rdf))


nb acc CV: 0.5463576158940397
knn acc CV: 0.6821192052980133
svm acc CV: 0.6688741721854304
rdf acc CV: 0.704251655629139


In [6]:
save_pickle(rdf_real_model, "models/" + dataset_string + "_rdf_reall_full.pkl")
save_pickle(acc_rdf, "metrics/" + dataset_string + "_rdf_reall_full_acc.pkl")


# Variability baseline

In [7]:
variability_baseline_list = []

for _ in range(0, num_trials):    
    variability_baseline = evaluation_variability(df_tcs_train)
    variability_baseline_list.append(variability_baseline)

mean_var_baseline = mean(variability_baseline_list)
print(mean_var_baseline)
save_pickle(mean_var_baseline, "metrics/" + dataset_string + "_variability_baseline.pkl")


Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Comfort: -2
Thermal Comfort: 1
Thermal Comfort: -1
Thermal Comfort: 2
Thermal Comfort: 0
Thermal Com

# Diversity baseline

In [8]:
diversity_baseline_list = []

for _ in range(0, num_trials):
    diversity_baseline = evaluation_diversity(df_tcs_train, df_tcs_train, baseline=True)
    diversity_baseline_list.append(diversity_baseline)

mean_diversity_baseline = mean(diversity_baseline_list)
print(mean_diversity_baseline)
save_pickle(mean_diversity_baseline, "metrics/" + dataset_string + "_diversity_baseline.pkl")


1.8966552371208951


# Quality of the final classification

In [8]:
class_acc_test, class_acc_train, class_models, class_report_rdf = evaluation_classification(df_tcs_train, 
                                                                                                df_tcs_test, 
                                                                                                rdf_depth=fixed_depth, 
                                                                                                depth_file_name='default', 
                                                                                                test_size_percentage=test_size_percentage)


In [9]:
print(class_acc_test)


[0.5974955277280859, 0.46869409660107336, 0.007155635062611807, 0.6046511627906976]


In [10]:
final_classification_rdf = class_acc_test[3]

save_pickle(final_classification_rdf, "metrics/" + dataset_string + "_rdf_classification_baseline.pkl")
save_pickle(class_report_rdf, "label-metrics/" + dataset_string + "_class_report_baseline_trials.pkl")
