# Calculate How Additional Users Improve Results

In [14]:
import numpy as np
import pandas as pd
from datetime import datetime
import os

# Preperation
from sklearn.model_selection import train_test_split

# Modeling
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans

# Visualisations
from sklearn.tree import export_graphviz # Note that you need to brew install graphviz on your local machine
import pydot 
import seaborn as sns
import matplotlib.pyplot as plt

# Evaluation
from sklearn import metrics

# User Defined Functions
import cozie_functions

In [15]:
# Add Data Folder to Path
data_path = os.path.abspath(os.path.join(os.path.dirname( "__file__" ), '..', 'data-processed'))

In [16]:
# The following participants took part in the experiment:
participant_ids = ['cresh' + str(id).zfill(2) for id in range(1,31)]
print(participant_ids)

['cresh01', 'cresh02', 'cresh03', 'cresh04', 'cresh05', 'cresh06', 'cresh07', 'cresh08', 'cresh09', 'cresh10', 'cresh11', 'cresh12', 'cresh13', 'cresh14', 'cresh15', 'cresh16', 'cresh17', 'cresh18', 'cresh19', 'cresh20', 'cresh21', 'cresh22', 'cresh23', 'cresh24', 'cresh25', 'cresh26', 'cresh27', 'cresh28', 'cresh29', 'cresh30']


In [17]:

# Function to convert my dataframe into a numpy array
def create_training_data(dataframe, preference, drop_features):
    clean_dataframe = dataframe.copy(deep=True)
    
    labels = np.array(clean_dataframe[preference + '_cozie'])
    #print(clean_dataframe.columns.values)
    features_df = clean_dataframe.drop(drop_features, axis=1)
    #print(features_df.columns)
    feature_set = np.array(features_df)
    
    return (feature_set, labels)


# Do Everything within a for loop 
- read data
- convert to features and labels
- run the rf model
- evalaute

In [24]:

# Define Feature Set the 
feature_set = 'fs5'

# Define the participant test set to use. TODO: Cycle through all participants and average
participant = 'cresh03'

# Read Test Set Data
test_set_df = pd.read_csv(os.path.join(data_path, 
            '2019-11-15_' + feature_set + '_val_' + participant + '.csv'))

# labels to drop. TODO: Eventually have this dynamic
# TODO Don't drop rooms
thermal_drop_features = ['light_cozie', 'noise_cozie', 'user_id', 'thermal_cozie',
                        'prefer_dimmer', 'prefer_brighter', 'prefer_quieter', 'prefer_louder', 'room']

# Convert test set data to labels
test_features, test_labels = create_training_data(test_set_df, 'thermal', thermal_drop_features)



print("test shape", test_set_df.shape)


# Empty dataframe to append results
train_set_df = pd.DataFrame()

for participant in participant_ids:
    
    new_train_set_df = pd.read_csv(os.path.join(data_path, 
            '2019-11-15_' + feature_set + '_train_' + participant + '.csv'))
    
    train_set_df = pd.concat([train_set_df, new_train_set_df])
    
    print('train shape', train_set_df.shape)

    train_features, train_labels = create_training_data(train_set_df, 'thermal', thermal_drop_features)
    print('Training Features Shape:', train_features.shape)
    print('Training Labels Shape:', train_labels.shape)
    

    # Instantiate model with 1000 decision trees
    thermal_rf = RandomForestClassifier(n_estimators = 1000, random_state = 42 )
    # Train the model on training data
    thermal_rf.fit(train_features, train_labels);

    # Use the forest's predict method on the test data
    predictions = thermal_rf.predict(test_features)
    
    print("Summary of Thermal Prediction")
    print(metrics.accuracy_score(predictions, test_labels))
    print(metrics.f1_score(predictions, test_labels, average="micro")) # Micro averages all the results
    print(metrics.f1_score(predictions, test_labels, average="macro")) #Macro treats each class equally 
    print(metrics.confusion_matrix(predictions, test_labels))
    print(metrics.classification_report(predictions,test_labels))
    
    
    
    



test shape (18, 16)
train shape (32, 16)
Training Features Shape: (32, 7)
Training Labels Shape: (32,)
Summary of Thermal Prediction
1.0
1.0
1.0
[[18]]
              precision    recall  f1-score   support

        10.0       1.00      1.00      1.00        18

   micro avg       1.00      1.00      1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

train shape (67, 16)
Training Features Shape: (67, 7)
Training Labels Shape: (67,)
Summary of Thermal Prediction
0.0
0.0
0.0
[[ 0  0]
 [18  0]]
              precision    recall  f1-score   support

        10.0       0.00      0.00      0.00         0
        11.0       0.00      0.00      0.00        18

   micro avg       0.00      0.00      0.00        18
   macro avg       0.00      0.00      0.00        18
weighted avg       0.00      0.00      0.00        18

train shape (94, 16)
Training Features Shape: (94, 7)
Training Labels Shape: (94,)


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Summary of Thermal Prediction
0.7777777777777778
0.7777777777777778
0.2916666666666667
[[ 0  1  0]
 [ 0 14  0]
 [ 0  3  0]]
              precision    recall  f1-score   support

         9.0       0.00      0.00      0.00         1
        10.0       0.78      1.00      0.88        14
        11.0       0.00      0.00      0.00         3

   micro avg       0.78      0.78      0.78        18
   macro avg       0.26      0.33      0.29        18
weighted avg       0.60      0.78      0.68        18

train shape (122, 16)
Training Features Shape: (122, 7)
Training Labels Shape: (122,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.9444444444444444
0.9444444444444444
0.4857142857142857
[[ 0  1]
 [ 0 17]]
              precision    recall  f1-score   support

         9.0       0.00      0.00      0.00         1
        10.0       0.94      1.00      0.97        17

   micro avg       0.94      0.94      0.94        18
   macro avg       0.47      0.50      0.49        18
weighted avg       0.89      0.94      0.92        18

train shape (153, 16)
Training Features Shape: (153, 7)
Training Labels Shape: (153,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
1.0
1.0
1.0
[[18]]
              precision    recall  f1-score   support

        10.0       1.00      1.00      1.00        18

   micro avg       1.00      1.00      1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

train shape (226, 16)
Training Features Shape: (226, 7)
Training Labels Shape: (226,)
Summary of Thermal Prediction
1.0
1.0
1.0
[[18]]
              precision    recall  f1-score   support

        10.0       1.00      1.00      1.00        18

   micro avg       1.00      1.00      1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

train shape (254, 16)
Training Features Shape: (254, 7)
Training Labels Shape: (254,)
Summary of Thermal Prediction
0.9444444444444444
0.9444444444444444
0.4857142857142857
[[17  0]
 [ 1  0]]
              precision    recall  f1-score   support

        10.0       0.94

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
1.0
1.0
1.0
[[18]]
              precision    recall  f1-score   support

        10.0       1.00      1.00      1.00        18

   micro avg       1.00      1.00      1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

train shape (309, 16)
Training Features Shape: (309, 7)
Training Labels Shape: (309,)
Summary of Thermal Prediction
0.9444444444444444
0.9444444444444444
0.4857142857142857
[[17  0]
 [ 1  0]]
              precision    recall  f1-score   support

        10.0       0.94      1.00      0.97        17
        11.0       0.00      0.00      0.00         1

   micro avg       0.94      0.94      0.94        18
   macro avg       0.47      0.50      0.49        18
weighted avg       0.89      0.94      0.92        18

train shape (337, 16)
Training Features Shape: (337, 7)
Training Labels Shape: (337,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (362, 16)
Training Features Shape: (362, 7)
Training Labels Shape: (362,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (392, 16)
Training Features Shape: (392, 7)
Training Labels Shape: (392,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (423, 16)
Training Features Shape: (423, 7)
Training Labels Shape: (423,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (464, 16)
Training Features Shape: (464, 7)
Training Labels Shape: (464,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (486, 16)
Training Features Shape: (486, 7)
Training Labels Shape: (486,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (528, 16)
Training Features Shape: (528, 7)
Training Labels Shape: (528,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (553, 16)
Training Features Shape: (553, 7)
Training Labels Shape: (553,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (590, 16)
Training Features Shape: (590, 7)
Training Labels Shape: (590,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (615, 16)
Training Features Shape: (615, 7)
Training Labels Shape: (615,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (642, 16)
Training Features Shape: (642, 7)
Training Labels Shape: (642,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (673, 16)
Training Features Shape: (673, 7)
Training Labels Shape: (673,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8333333333333334
0.8333333333333334
0.45454545454545453
[[15  0]
 [ 3  0]]
              precision    recall  f1-score   support

        10.0       0.83      1.00      0.91        15
        11.0       0.00      0.00      0.00         3

   micro avg       0.83      0.83      0.83        18
   macro avg       0.42      0.50      0.45        18
weighted avg       0.69      0.83      0.76        18

train shape (696, 16)
Training Features Shape: (696, 7)
Training Labels Shape: (696,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (723, 16)
Training Features Shape: (723, 7)
Training Labels Shape: (723,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8333333333333334
0.8333333333333334
0.45454545454545453
[[15  0]
 [ 3  0]]
              precision    recall  f1-score   support

        10.0       0.83      1.00      0.91        15
        11.0       0.00      0.00      0.00         3

   micro avg       0.83      0.83      0.83        18
   macro avg       0.42      0.50      0.45        18
weighted avg       0.69      0.83      0.76        18

train shape (751, 16)
Training Features Shape: (751, 7)
Training Labels Shape: (751,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8333333333333334
0.8333333333333334
0.45454545454545453
[[15  0]
 [ 3  0]]
              precision    recall  f1-score   support

        10.0       0.83      1.00      0.91        15
        11.0       0.00      0.00      0.00         3

   micro avg       0.83      0.83      0.83        18
   macro avg       0.42      0.50      0.45        18
weighted avg       0.69      0.83      0.76        18

train shape (774, 16)
Training Features Shape: (774, 7)
Training Labels Shape: (774,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8333333333333334
0.8333333333333334
0.45454545454545453
[[15  0]
 [ 3  0]]
              precision    recall  f1-score   support

        10.0       0.83      1.00      0.91        15
        11.0       0.00      0.00      0.00         3

   micro avg       0.83      0.83      0.83        18
   macro avg       0.42      0.50      0.45        18
weighted avg       0.69      0.83      0.76        18

train shape (800, 16)
Training Features Shape: (800, 7)
Training Labels Shape: (800,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (818, 16)
Training Features Shape: (818, 7)
Training Labels Shape: (818,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (836, 16)
Training Features Shape: (836, 7)
Training Labels Shape: (836,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (866, 16)
Training Features Shape: (866, 7)
Training Labels Shape: (866,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18

train shape (885, 16)
Training Features Shape: (885, 7)
Training Labels Shape: (885,)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Summary of Thermal Prediction
0.8888888888888888
0.8888888888888888
0.47058823529411764
[[16  0]
 [ 2  0]]
              precision    recall  f1-score   support

        10.0       0.89      1.00      0.94        16
        11.0       0.00      0.00      0.00         2

   micro avg       0.89      0.89      0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [7]:
# Convert Rooms and user Id to binary inputs (Hot Encoding)
## Modified in V3, and V5

 ## TODO: Get the hot encoding working from the test data
#train_set_df = pd.get_dummies(train_set_df, columns=['room'])
#test_set_df = pd.get_dummies(test_set_df, columns=['room'])
#print(list(train_set_df.columns))

## Convert into numpy arrays for classification

In [8]:
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (885, 8)
Training Labels Shape: (885,)
Testing Features Shape: (21, 8)
Testing Labels Shape: (21,)


# Model Usign RF