In [2]:
import pandas as pd

In [3]:
%matplotlib inline

In [4]:
# Convert CSV files to pandas DataFrames
log_dataframes = []
for i in xrange(1, 33):
    log_file = open("log%d.csv" % i, 'r')
    log_dataframes.append(pd.read_csv(log_file, index_col=0))
    log_file.close()

In [5]:
# Use SVM to classify points, using acceleration magnitude and RSSI as features
# Prints accuracy for leave-one-out training, followed by average accuracy

from sklearn import svm

accuracies = []
for left_out in xrange(len(log_dataframes)):
    training_df = pd.DataFrame()
    training_label_series = pd.Series()
    for log_index in filter(lambda x: x != left_out, xrange(len(log_dataframes))):
        acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].mean(axis=1)
        training_df = training_df.append(pd.concat([acc_mags, log_dataframes[log_index]["RSSI"]], axis=1))
        training_label_series = training_label_series.append(log_dataframes[log_index]["Unlock Requested"])
    
    svm_clf = svm.SVC()
    svm_clf.fit(training_df.as_matrix(), training_label_series.values)
    
    acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].mean(axis=1)
    target_df = pd.concat([acc_mags, log_dataframes[log_index]["RSSI"]], axis=1)
    target_label_series = log_dataframes[log_index]["Unlock Requested"]
    
    #predicted = svm_clf.predict(target_df)
    accuracy = svm_clf.score(target_df.as_matrix(), target_label_series.values)
    print "Left out log %d; Accuracy:%f" % (left_out + 1, accuracy)
    accuracies.append(accuracy)
print "Average accuracy:%f" % (sum(accuracies)/len(accuracies))


Left out log 1; Accuracy:1.000000
Left out log 2; Accuracy:1.000000
Left out log 3; Accuracy:1.000000
Left out log 4; Accuracy:1.000000
Left out log 5; Accuracy:1.000000
Left out log 6; Accuracy:1.000000
Left out log 7; Accuracy:1.000000
Left out log 8; Accuracy:1.000000
Left out log 9; Accuracy:1.000000
Left out log 10; Accuracy:1.000000
Left out log 11; Accuracy:1.000000
Left out log 12; Accuracy:1.000000
Left out log 13; Accuracy:1.000000
Left out log 14; Accuracy:1.000000
Left out log 15; Accuracy:1.000000
Left out log 16; Accuracy:1.000000
Left out log 17; Accuracy:1.000000
Left out log 18; Accuracy:1.000000
Left out log 19; Accuracy:1.000000
Left out log 20; Accuracy:1.000000
Left out log 21; Accuracy:1.000000
Left out log 22; Accuracy:1.000000
Left out log 23; Accuracy:1.000000
Left out log 24; Accuracy:1.000000
Left out log 25; Accuracy:1.000000
Left out log 26; Accuracy:1.000000
Left out log 27; Accuracy:1.000000
Left out log 28; Accuracy:1.000000
Left out log 29; Accuracy:1.0

In [8]:
# Use decision tree and visualize tree
from IPython.display import Image
from sklearn.externals.six import StringIO
from sklearn import tree
import pydot

accuracies = []
for left_out in xrange(len(log_dataframes)):
    training_df = pd.DataFrame()
    training_label_series = pd.Series()
    for log_index in filter(lambda x: x != left_out, xrange(len(log_dataframes))):
        acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].mean(axis=1)
        training_df = training_df.append(pd.concat([acc_mags, log_dataframes[log_index]["RSSI"]], axis=1))
        training_label_series = training_label_series.append(log_dataframes[log_index]["Unlock Requested"])
    
    dt_clf = tree.DecisionTreeClassifier(max_depth=2)
    dt_clf.fit(training_df.as_matrix(), training_label_series.values)
    
    acc_mags = log_dataframes[log_index][["Acc_X", "Acc_Y", "Acc_Z"]].mean(axis=1)
    target_df = pd.concat([acc_mags, log_dataframes[log_index]["RSSI"]], axis=1)
    target_label_series = log_dataframes[log_index]["Unlock Requested"]
    
    #predicted = dt_clf.predict(target_df)
    accuracy = dt_clf.score(target_df.as_matrix(), target_label_series.values)
    
    print "Left out log %d; Accuracy:%f" % (left_out + 1, accuracy)
    accuracies.append(accuracy)
    
    '''
    # Doesn't work with my Python distribution?
    dot_data = StringIO()
    tree.export_graphviz(dt_clf, out_file=dot_data) 
    graph = pydot.graph_from_dot_data(dot_data.getvalue()) 
    graph.write_pdf("dt%d.pdf" % left_out)
    '''
                     
print "Average accuracy:%f" % (sum(accuracies)/len(accuracies))

Left out log 1; Accuracy:1.000000
Left out log 2; Accuracy:1.000000
Left out log 3; Accuracy:1.000000
Left out log 4; Accuracy:1.000000
Left out log 5; Accuracy:1.000000
Left out log 6; Accuracy:1.000000
Left out log 7; Accuracy:1.000000
Left out log 8; Accuracy:1.000000
Left out log 9; Accuracy:1.000000
Left out log 10; Accuracy:1.000000
Left out log 11; Accuracy:1.000000
Left out log 12; Accuracy:1.000000
Left out log 13; Accuracy:1.000000
Left out log 14; Accuracy:1.000000
Left out log 15; Accuracy:1.000000
Left out log 16; Accuracy:1.000000
Left out log 17; Accuracy:1.000000
Left out log 18; Accuracy:1.000000
Left out log 19; Accuracy:1.000000
Left out log 20; Accuracy:1.000000
Left out log 21; Accuracy:1.000000
Left out log 22; Accuracy:1.000000
Left out log 23; Accuracy:1.000000
Left out log 24; Accuracy:1.000000
Left out log 25; Accuracy:1.000000
Left out log 26; Accuracy:1.000000
Left out log 27; Accuracy:1.000000
Left out log 28; Accuracy:1.000000
Left out log 29; Accuracy:1.0