In [1]:
import pandas as pd
from sklearn import tree
import pydotplus
from sklearn.tree import DecisionTreeClassifier, export_text, _tree
from sklearn.metrics import classification_report


from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as pltimg
import numpy as np
from sklearn import metrics
from joblib import dump, load

In [2]:
# Import the two datasets 

df = pd.read_csv("log_labeled_trans.csv", header = 0).fillna(0)
df.columns = df.columns.str.replace(' ', '_')
df['Label']=df['Label'].str.replace(' ', '_')

In [3]:
data_top = df.columns
columns = list(data_top) 
print(columns)

['Start_time', 'End_time', 'Hall-Bathroom_door', 'ToiletFlush', 'Plates_cupboard', 'Fridge', 'Microwave', 'Groceries_Cupboard', 'Hall-Toilet_door', 'Frontdoor', 'Hall-Bedroom_door', 'Pans_Cupboard', 'Freezer', 'Cups_cupboard', 'Dishwasher', 'Washingmachine', 'Label', 'Label_ID']


In [4]:
features = columns
features.remove("Label")
features.remove("Label_ID")
features.remove("Start_time")
features.remove("End_time")
features

['Hall-Bathroom_door',
 'ToiletFlush',
 'Plates_cupboard',
 'Fridge',
 'Microwave',
 'Groceries_Cupboard',
 'Hall-Toilet_door',
 'Frontdoor',
 'Hall-Bedroom_door',
 'Pans_Cupboard',
 'Freezer',
 'Cups_cupboard',
 'Dishwasher',
 'Washingmachine']

In [5]:
X = df[features]
# y = df.Label_ID
y = df.Label

In [6]:
def create_rules_recursive(clf,features,node,expression,rules):
    # intermediate node
    if clf.tree_.feature[node] != _tree.TREE_UNDEFINED:
        name = features[clf.tree_.feature[node]]
        threshold = clf.tree_.threshold[node]
        if expression != '':
            expression = expression + ' AND '
        create_rules_recursive(clf,features,clf.tree_.children_left[node],expression + str(name) + '<=' + str(threshold),rules)
        create_rules_recursive(clf,features,clf.tree_.children_right[node],expression + str(name) + '>' + str(threshold),rules)
    # leaf node
    else:
        key = clf.classes_[np.argmax(clf.tree_.value[node])]
        if (key not in rules):
            rules[key] = '(' + expression + ')'
        else:
            rules[key] = rules[key] + ' OR (' + expression + ')'

In [7]:
def print_decision_rules(rf,features):

    tree = rf.tree_
    assert tree.value.shape[1] == 1 # no support for multi-output
    
    iterator = enumerate(zip(tree.children_left, tree.children_right, tree.feature, tree.threshold, tree.value))
    for node_idx, data in iterator:
        left, right, feature, th, value = data

        # left: index of left child (if any)
        # right: index of right child (if any)
        # feature: index of the feature to check
        # th: the threshold to compare against
        # value: values associated with classes            

        # for classifier, value is 0 except the index of the class to return
        class_idx = np.argmax(value[0])

        if left == -1 and right == -1:
            print('{} LEAF: return class={}'.format(node_idx, class_idx))
        else:
            print('{} NODE: if {} < {} then next={} else next={}'.format(node_idx, features[feature], th, left, right))    
            print('{} NODE: if {} < {} then next={} else next={}'.format(node_idx, feature, th, left, right))    
            



In [8]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)
# print(clf.feature_names_in_)
print(clf.classes_)
# print_decision_rules(clf,features)
rules = dict()
create_rules_recursive(clf,features,0,'',rules)
print(rules)
print(export_text(clf))

dump(clf, 'test.joblib') 

data = tree.export_graphviz(clf, out_file=None, feature_names=features, class_names = clf.classes_, filled=True)
graph = pydotplus.graph_from_dot_data(data)
graph.write_png('mydecisiontree_classifier_test.png')

#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# img=pltimg.imread('mydecisiontree.png')
# imgplot = plt.imshow(img)
# plt.show() 


['get_drink' 'go_to_bed' 'leave_house' 'prepare_Breakfast' 'use_toilet']
{'use_toilet': '(Plates_cupboard<=0.5 AND Frontdoor<=0.5 AND Fridge<=0.5 AND Pans_Cupboard<=0.5 AND Hall-Bedroom_door<=0.5 AND Cups_cupboard<=0.5 AND Hall-Bathroom_door<=1.5 AND Hall-Toilet_door<=1.5 AND ToiletFlush<=2.5 AND ToiletFlush<=1.5 AND Hall-Bathroom_door<=0.5 AND Hall-Toilet_door<=0.5) OR (Plates_cupboard<=0.5 AND Frontdoor<=0.5 AND Fridge<=0.5 AND Pans_Cupboard<=0.5 AND Hall-Bedroom_door<=0.5 AND Cups_cupboard<=0.5 AND Hall-Bathroom_door<=1.5 AND Hall-Toilet_door<=1.5 AND ToiletFlush<=2.5 AND ToiletFlush<=1.5 AND Hall-Bathroom_door<=0.5 AND Hall-Toilet_door>0.5 AND ToiletFlush<=0.5) OR (Plates_cupboard<=0.5 AND Frontdoor<=0.5 AND Fridge<=0.5 AND Pans_Cupboard<=0.5 AND Hall-Bedroom_door<=0.5 AND Cups_cupboard<=0.5 AND Hall-Bathroom_door<=1.5 AND Hall-Toilet_door<=1.5 AND ToiletFlush<=2.5 AND ToiletFlush<=1.5 AND Hall-Bathroom_door<=0.5 AND Hall-Toilet_door>0.5 AND ToiletFlush>0.5) OR (Plates_cupboard<=0.

In [9]:
print(classification_report(y_test, y_pred))

                   precision    recall  f1-score   support

        get_drink       0.50      0.75      0.60         4
        go_to_bed       0.00      0.00      0.00         3
      leave_house       1.00      1.00      1.00        17
prepare_Breakfast       0.97      0.81      0.88        36
       use_toilet       0.90      0.97      0.93        64

         accuracy                           0.90       124
        macro avg       0.67      0.70      0.68       124
     weighted avg       0.90      0.90      0.89       124



In [10]:
print("Trying to predict the take shower activity, with ID 5, \nwith the following combination 1,0,0,0,0,0,0,3,0,0,0,0,0,0")
pred = clf.predict([[1,0,0,0,0,0,0,3,0,0,0,0,0,0]])
print("Value predicted:", pred)
if(pred == 5): print("Correct prediction") 
else: print("Wrong predicition")

Trying to predict the take shower activity, with ID 5, 
with the following combination 1,0,0,0,0,0,0,3,0,0,0,0,0,0
Value predicted: ['leave_house']
Wrong predicition
