In [1]:
import pandas as pd
from sklearn import tree
import pydotplus
from sklearn.tree import DecisionTreeClassifier, export_text, _tree
from sklearn.metrics import classification_report


from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as pltimg
import numpy as np
from sklearn import metrics
from joblib import dump, load

In [2]:
# Import
df = pd.read_csv("Ltrain_labeled_trans.csv", header = 0).fillna(0)

In [3]:
data_top = df.columns
columns = list(data_top) 
print(columns)

['Timestamp', 'door_A', 'sens1_A', 'sens3_A', 'sens4_A', 'door_B', 'sens4_B', 'sens2_B', 'sens1_B', 'sens3_B', 'door_C', 'sense1_C', 'sens3_C', 'sens2_C', 'sens4_C', 'sens4_D', 'sens3_D', 'sens2_D', 'sens1_D', 'sens2_A', 'door_D', 'Label']


In [4]:
features = columns
features.remove("Timestamp")
features.remove("Label")
features

['door_A',
 'sens1_A',
 'sens3_A',
 'sens4_A',
 'door_B',
 'sens4_B',
 'sens2_B',
 'sens1_B',
 'sens3_B',
 'door_C',
 'sense1_C',
 'sens3_C',
 'sens2_C',
 'sens4_C',
 'sens4_D',
 'sens3_D',
 'sens2_D',
 'sens1_D',
 'sens2_A',
 'door_D']

In [5]:
X = df[features]
# y = df.Label_ID
y = df.Label

In [6]:
def create_rules_recursive(clf,features,node,expression,rules):
    # intermediate node
    if clf.tree_.feature[node] != _tree.TREE_UNDEFINED:
        name = features[clf.tree_.feature[node]]
        threshold = clf.tree_.threshold[node]
        if expression != '':
            expression = expression + ' AND '
        create_rules_recursive(clf,features,clf.tree_.children_left[node],expression + str(name) + '<=' + str(threshold),rules)
        create_rules_recursive(clf,features,clf.tree_.children_right[node],expression + str(name) + '>' + str(threshold),rules)
    # leaf node
    else:
        key = clf.classes_[np.argmax(clf.tree_.value[node])]
        if (key not in rules):
            rules[key] = '(' + expression + ')'
        else:
            rules[key] = rules[key] + ' OR (' + expression + ')'

In [7]:
def print_decision_rules(rf,features):

    tree = rf.tree_
    assert tree.value.shape[1] == 1 # no support for multi-output
    
    iterator = enumerate(zip(tree.children_left, tree.children_right, tree.feature, tree.threshold, tree.value))
    for node_idx, data in iterator:
        left, right, feature, th, value = data

        # left: index of left child (if any)
        # right: index of right child (if any)
        # feature: index of the feature to check
        # th: the threshold to compare against
        # value: values associated with classes            

        # for classifier, value is 0 except the index of the class to return
        class_idx = np.argmax(value[0])

        if left == -1 and right == -1:
            print('{} LEAF: return class={}'.format(node_idx, class_idx))
        else:
            print('{} NODE: if {} < {} then next={} else next={}'.format(node_idx, features[feature], th, left, right))    
            print('{} NODE: if {} < {} then next={} else next={}'.format(node_idx, feature, th, left, right))    
            



In [8]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)
# print(clf.feature_names_in_)
print(clf.classes_)
# print_decision_rules(clf,features)
rules = dict()
create_rules_recursive(clf,features,0,'',rules)
print(rules)
print(export_text(clf))

data = tree.export_graphviz(clf, out_file=None, feature_names=features, class_names = clf.classes_, filled=True)
graph = pydotplus.graph_from_dot_data(data)
graph.write_png('Ltrain_dtree.png')

#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# img=pltimg.imread('mydecisiontree.png')
# imgplot = plt.imshow(img)
# plt.show() 


['A' 'B' 'C' 'D']
{'D': '(door_C<=0.5 AND door_A<=0.5 AND sens1_B<=0.5 AND door_B<=0.5)', 'B': '(door_C<=0.5 AND door_A<=0.5 AND sens1_B<=0.5 AND door_B>0.5) OR (door_C<=0.5 AND door_A<=0.5 AND sens1_B>0.5)', 'A': '(door_C<=0.5 AND door_A>0.5)', 'C': '(door_C>0.5)'}
|--- feature_9 <= 0.50
|   |--- feature_0 <= 0.50
|   |   |--- feature_7 <= 0.50
|   |   |   |--- feature_4 <= 0.50
|   |   |   |   |--- class: D
|   |   |   |--- feature_4 >  0.50
|   |   |   |   |--- class: B
|   |   |--- feature_7 >  0.50
|   |   |   |--- class: B
|   |--- feature_0 >  0.50
|   |   |--- class: A
|--- feature_9 >  0.50
|   |--- class: C

Accuracy: 1.0


In [9]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           A       1.00      1.00      1.00        37
           B       1.00      1.00      1.00        49
           C       1.00      1.00      1.00        51
           D       1.00      1.00      1.00        31

    accuracy                           1.00       168
   macro avg       1.00      1.00      1.00       168
weighted avg       1.00      1.00      1.00       168



In [10]:
print("Trying to predict the activity A, \nwith the following combination 1,6,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0")
pred = clf.predict([[1,6,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
print("Value predicted:", pred)
if(pred == "A"): print("Correct prediction") 
else: print("Wrong predicition")

Trying to predict the activity A, 
with the following combination 1,6,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Value predicted: ['A']
Correct prediction
