In [1]:
# May give error about which API it was compiled against; simply run this twice
import pandas as pd

RuntimeError: module compiled against API version 0xa but this version of numpy is 0x9

In [2]:
import pandas as pd

In [3]:
%matplotlib inline

In [9]:
# Convert CSV files to pandas DataFrames
log_dataframes = []
for i in xrange(1, 37):
    log_file = open("log%d.csv" % i, 'r')
    raw_dataframe = pd.read_csv(log_file, index_col=0)
    window_size = 20
    raw_dataframe["Smoothed RSSI"] = pd.ewma(raw_dataframe["RSSI"].values, span=window_size)
    cleaned_dataframe = raw_dataframe.drop("RSSI", axis=1)
    log_dataframes.append(cleaned_dataframe)
    log_file.close()

In [12]:
# Use decision tree and visualize tree
from sklearn.externals.six import StringIO
from sklearn import tree
import math

def magnitude(accel):
    return math.sqrt(accel.values[0] ** 2 + accel.values[1] ** 2 + accel.values[2] ** 2)

accuracies = []
rssi_thresholds = []
acc_mag_thresholds = []
features = ["Acceleration Magnitude", "Smoothed RSSI"]
for left_out in xrange(len(log_dataframes)):
    training_df = pd.DataFrame()
    training_label_series = pd.Series()
    for log_index in filter(lambda x: x != left_out, xrange(len(log_dataframes))):
        acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].apply(magnitude, axis=1)
        training_df = training_df.append(pd.concat([acc_mags, log_dataframes[log_index]["Smoothed RSSI"]], axis=1))
        training_label_series = training_label_series.append(log_dataframes[log_index]["Unlock Requested"])
    
    dt_clf = tree.DecisionTreeClassifier(max_depth=2)
    dt_clf.fit(training_df.as_matrix(), training_label_series.values)
    
    acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].apply(magnitude, axis=1)
    target_df = pd.concat([acc_mags, log_dataframes[log_index]["Smoothed RSSI"]], axis=1)
    target_label_series = log_dataframes[log_index]["Unlock Requested"]
    
    #predicted = dt_clf.predict(target_df)
    
    accuracy = dt_clf.score(target_df.as_matrix(), target_label_series.values)
    print "Left out log %d; Accuracy:%f" % (left_out + 1, accuracy)
    accuracies.append(accuracy)
    
    # One model only uses RSSI - ignore it for now?
    if dt_clf.tree_.feature[dt_clf.tree_.children_right[0]] != 1:
        rssi_threshold = dt_clf.tree_.threshold[0]
        print "RSSI threshold: %f" % rssi_threshold
        rssi_thresholds.append(rssi_threshold)
    
        acc_mag_threshold = dt_clf.tree_.threshold[dt_clf.tree_.children_right[0]]
        print "Acceleration Magnitude threshold: %f" % acc_mag_threshold
        acc_mag_thresholds.append(acc_mag_threshold)
    
    print "" # Newline for spacing
    
    # Run dot -Tpdf log<num>.dot -o log<num>.pdf to generate graph
    with open("log%d.dot" % (left_out + 1), 'w') as f:
        f = tree.export_graphviz(dt_clf, out_file=f, feature_names=features)
                     
print "Average accuracy:%f" % (sum(accuracies)/len(accuracies))
print "Average Smoothed RSSI threshold:%f" % (sum(rssi_thresholds)/len(rssi_thresholds))
print "Average acceleration magnitude threshold:%f" % (sum(acc_mag_thresholds)/len(acc_mag_thresholds))

Left out log 1; Accuracy:1.000000

Left out log 2; Accuracy:1.000000

Left out log 3; Accuracy:1.000000

Left out log 4; Accuracy:1.000000

Left out log 5; Accuracy:1.000000

Left out log 6; Accuracy:1.000000

Left out log 7; Accuracy:1.000000

Left out log 8; Accuracy:1.000000

Left out log 9; Accuracy:1.000000

Left out log 10; Accuracy:1.000000

Left out log 11; Accuracy:1.000000

Left out log 12; Accuracy:1.000000

Left out log 13; Accuracy:1.000000

Left out log 14; Accuracy:1.000000

Left out log 15; Accuracy:1.000000

Left out log 16; Accuracy:1.000000

Left out log 17; Accuracy:1.000000

Left out log 18; Accuracy:1.000000

Left out log 19; Accuracy:1.000000

Left out log 20; Accuracy:1.000000
RSSI threshold: -66.312897
Acceleration Magnitude threshold: 0.231660

Left out log 21; Accuracy:1.000000

Left out log 22; Accuracy:1.000000

Left out log 23; Accuracy:1.000000

Left out log 24; Accuracy:1.000000

Left out log 25; Accuracy:1.000000

Left out log 26; Accuracy:1.000000

Lef

In [19]:
# Test manual "decision tree" model
target_df = pd.DataFrame()
training_labels = pd.Series()
for log_index in xrange(len(log_dataframes)):
    acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].apply(magnitude, axis=1)
    target_df = target_df.append(pd.concat([acc_mags, log_dataframes[log_index]["Smoothed RSSI"]], axis=1))
    training_labels = training_labels.append(log_dataframes[log_index]["Unlock Requested"])
target_labels = []
for sample in target_df.as_matrix():
    # First check RSSI
    current_class = 0 # No unlock requested
    if sample[1] >= -61.0:
        # Then check acceleration magnitude
        if sample[0] <= 0.20:
            current_class = 1 # Unlock requested
    target_labels.append(current_class)

false_positives = 0
false_negatives = 0
for i in xrange(len(training_labels)):
    if training_labels.values[i] != target_labels[i]:
        if training_labels.values[i] == 0:
            false_positives += 1
        else:
            false_negatives += 1

err = (false_positives + false_negatives) / float(len(training_labels))
print "Model error of %.2f%% over %d samples" % (err * 100.0, len(target_labels))
print "False positive percentage: %.2f%%" % (100.0 * false_positives / float(len(filter(lambda x: x == 0, training_labels.values))))
print "False negative percentage: %.2f%%" % (100.0 * false_negatives / float(len(filter(lambda x: x == 1, training_labels.values))))

Model error of 3.23% over 7953 samples
False positive percentage: 0.03%
False negative percentage: 91.73%


In [20]:
# Test per walk
successes = 0
for i in xrange(len(log_dataframes)):
    acc_mags = log_dataframes[i][["Acc_X", "Acc_Y", "Acc_Z"]].apply(magnitude, axis=1)
    target_df = pd.concat([acc_mags, log_dataframes[i]["Smoothed RSSI"]], axis=1)
    training_labels = log_dataframes[i]["Unlock Requested"]
    
    target_labels = []
    for sample in target_df.as_matrix():
        # First check RSSI
        current_class = 0 # No unlock requested
        if sample[1] >= -61.0:
            # Then check acceleration magnitude
            if sample[0] <= 0.20:
                current_class = 1 # Unlock requested
        target_labels.append(current_class)
        
    false_positives = 0
    false_negatives = 0
    for i in xrange(len(training_labels)):
        if training_labels.values[i] != target_labels[i]:
            if training_labels.values[i] == 0:
                false_positives += 1
            else:
                false_negatives += 1

    err = (false_positives + false_negatives) / float(len(training_labels))
    print "Model error of %.2f%% over %d samples in log %d" % (err * 100.0, len(target_labels), i)
    
    total_negative = float(len(filter(lambda x: x == 0, training_labels.values)))
    if total_negative > 0:
        print "False positive percentage: %.2f%%" % (100.0 * false_positives / total_negative)
    else:
        print "No false positives"
    
    total_positive = float(len(filter(lambda x: x == 1, training_labels.values)))
    if total_positive > 0:
        print "False negative percentage: %.2f%%" % (100.0 * false_negatives / total_positive)
    else:
        print "No false negatives"
    
    #print "Training: "
    #print training_labels.values
    #print "Target: "
    #print target_labels
    
    if (false_positives == 0) and (total_positive == 0.0 or false_negatives / total_positive != 1.0):
        successes += 1
        print "Success!"
    else:
        print "Failed"
    print ""
        
print "Success rate: %.2f%%" % (100.0 * successes / float(len(log_dataframes)))

Model error of 11.59% over 164 samples in log 163
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed

Model error of 9.09% over 143 samples in log 142
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed

Model error of 8.89% over 135 samples in log 134
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed

Model error of 8.97% over 145 samples in log 144
False positive percentage: 1.50%
False negative percentage: 91.67%
Failed

Model error of 8.12% over 160 samples in log 159
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed

Model error of 10.34% over 145 samples in log 144
False positive percentage: 0.00%
False negative percentage: 93.75%
Success!

Model error of 5.21% over 192 samples in log 191
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed

Model error of 8.25% over 206 samples in log 205
False positive percentage: 0.00%
False negative percentage: 100.00%
Failed