In [27]:
import matplotlib.pyplot as plt
from plotly import tools
from mpl_toolkits.mplot3d import Axes3D
import collections
import numpy as np
import plotly as py
import plotly.graph_objs as go
import datetime
from sklearn.model_selection import train_test_split
import pandas as pd
from joblib import load, dump
py.offline.init_notebook_mode(connected = True)


In [2]:
def FindPhoneDataIntervals(phoneData):
    intervals = []
    
    currentInterval = []
    currentCoordinate = None
    multiKeypressInterval = []
    multiKeypressCoordinate = None
    for datum in phoneData:
        if datum['eventID'] == 0:
            currentInterval.append(datum['timeStamp'])
            currentCoordinate = datum['x']
        
        if datum['eventID'] == 5:
            if currentInterval == []:
                currentInterval.append(datum['timeStamp'])
            else:
                multiKeypressInterval.append(datum['timeStamp'])
                multiKeypressCoordinate = datum['x']
        
        if len(currentInterval) == 0 and len(multiKeypressInterval) == 0:
            print("Failed to match a key release with the following time stamp: {}".format(datum['timeStamp']))
            continue
            
        if datum['eventID'] == 1:
            currentInterval.append(datum['timeStamp'])
            intervals.append(currentInterval)
            currentInterval = []
            multiKeypressInterval = []
                
        if datum['eventID'] == 6:
            if len(currentInterval) == 1 and len(multiKeypressInterval) == 1:
                if abs(datum['x'] - multiKeypressCoordinate) < abs(datum['x'] - currentCoordinate):
                    multiKeypressInterval.append(datum['timeStamp'])
                    intervals.append(multiKeypressInterval)
                else:
                    currentInterval.append(datum['timeStamp'])
                    intervals.append(currentInterval)
                    currentInterval = multiKeypressInterval
                    
                multiKeypressInterval = []
                
            else:
                currentInterval.append(datum['timeStamp'])
                intervals.append(currentInterval)
                currentInterval = []
                
            
            
    return intervals

def getKeypressLabels(thumbData, phoneData):
    keypressLabels = np.zeros(len(thumbData))
    
    phoneDataIntervals = FindPhoneDataIntervals(phoneData)
    intervalIndex = 0
    currentInterval = phoneDataIntervals[intervalIndex]
    
    for thumbDataIndex, thumbDatum in thumbData.iterrows():
        while currentInterval[1] < thumbDatum['timeStamp']:
            intervalIndex += 1
            
            if intervalIndex > len(phoneDataIntervals):
                return keypressLabels
            
            currentInterval = phoneDataIntervals[intervalIndex]
        
        if thumbDatum['timeStamp'] >= currentInterval[0] and thumbDatum['timeStamp'] <= currentInterval[1]:
            keypressLabels[thumbDataIndex] = 1
            
        elif thumbDatum['timeStamp'] < currentInterval[0]:
            continue
            
    return keypressLabels
            
            

In [3]:
keyboardDataFileName = 'keyboard_data.txt'
g_thumbKeyboardPositions = collections.defaultdict(np.array)
with open(keyboardDataFileName, 'r') as file:
    for line in file:
        posX, posY, letter = line.split()
        g_thumbKeyboardPositions[letter] = np.array([float(posX), float(posY)])

g_thumbKeyboardPositions[" "] = np.array([900., 850.]) 

def gridCoordinateSearch(coordinate, thumbKeyboardPositions):
    leadingLetters = ["q", "a","z", " "]
    rows = ["qwertyuiop", "asdfghjkl", "zxcvbnm", " "]
    closestRowIndex = min([ind for ind in range(len(leadingLetters))], key = lambda ind: abs(coordinate[1] - thumbKeyboardPositions[leadingLetters[ind]][1]))
    
    closestLetter = min([letter for letter in rows[closestRowIndex]], key = lambda letter: abs(coordinate[0] - thumbKeyboardPositions[letter][0]))
    
    return closestLetter

gridCoordinateSearch(np.array([000.,900.]), g_thumbKeyboardPositions)
        
    

' '

In [4]:
markersDataFileName = "2019-03-13-15-18-38-752_markers_left.txt"
phoneDataFileName = "2019-03-13-15-18-38-747_left.txt"

In [5]:
markersData = []
phoneData = []

with open(phoneDataFileName, 'r') as file:
    itemHeaders = ["timeStamp", "logIndex", "keyboardID", "eventID", "x", "y"]
    for line in file:
        logEntry = line.split()
        if len(logEntry) == len(itemHeaders):
            phoneDatum = {itemName: data for itemName, data in zip(itemHeaders, logEntry)}
            phoneDatum["timeStamp"] = datetime.datetime.strptime(phoneDatum["timeStamp"], "%Y-%m-%d-%H-%M-%S-%f")
            phoneDatum["eventID"] = int(phoneDatum["eventID"])
            phoneDatum["logIndex"] = int(phoneDatum["logIndex"])
            phoneDatum["x"] = float(phoneDatum["x"])
            phoneDatum["y"] = float(phoneDatum["y"])          
            phoneData.append(phoneDatum)
        
with open(markersDataFileName, 'r') as file:
    itemHeaders = ["timeStamp", "logIndex", "markerID", "x", "y", "z"]
    for line in file:
        if logEntry != "":
            logEntry = line.split()
            if len(logEntry) == len(itemHeaders):
                timeStamp = datetime.datetime.strptime(logEntry[0], "%Y-%m-%d-%H-%M-%S-%f")
                if timeStamp >= phoneData[0]['timeStamp'] and timeStamp <= phoneData[-1]['timeStamp']:
                    markersDatum = {itemName: data for itemName, data in zip(itemHeaders, logEntry)}
                    markersDatum["timeStamp"] = timeStamp
                    markersDatum["markerID"] = int(markersDatum["markerID"])
                    markersDatum["logIndex"] = int(markersDatum["logIndex"])
                    markersDatum["x"] = float(markersDatum["x"])
                    markersDatum["y"] = float(markersDatum["y"])
                    markersDatum['z'] = float(markersDatum['z'])
                    markersData.append(markersDatum)

markersDataDictByID = collections.defaultdict(list)

for markersDatum in markersData:
    markersDataDictByID[markersDatum['markerID']].append(markersDatum)

In [7]:
plotData = collections.defaultdict(list)
markersData.sort(key = lambda datum:datum['timeStamp'])
for data in markersData[:20000]:
    plotData[data["markerID"]].append([float(data['x']), float(data['y']), float(data['z'])])

traces = []
for markerID, trajectory in plotData.items():
    trajectory = np.array(trajectory)
    trace = go.Scatter3d(
        x=trajectory[:,0],
        y=trajectory[:,1],
        z=trajectory[:,2],
        mode='lines',
        name = markerID
    )
    traces.append(trace)
    
fig = go.Figure(data = traces)

py.offline.plot(fig, filename = "left hand trajectory")

'file://C:\\Users\\Dave Lei\\Part IIB project\\Data logging\\left hand trajectory.html'

In [8]:
tipMarkerIDs = [53641]
middleMarkerIDs = [53639]
endMarkerIDs = [53640]

calibrationMarkerID = 51020

In [9]:
tipMarkerData, middleMarkerData, endMarkerData = [], [], []

for markerID in tipMarkerIDs:
    tipMarkerData.extend(markersDataDictByID[markerID])
    
for markerID in middleMarkerIDs:
    middleMarkerData.extend(markersDataDictByID[markerID])
    
for markerID in endMarkerIDs:
    endMarkerData.extend(markersDataDictByID[markerID])
    
calibration_centre = np.mean([np.array([datum['x'], datum['y'], datum['z']]) for datum in markersDataDictByID[calibrationMarkerID]], axis = 0)

In [13]:
df_tip = pd.DataFrame(tipMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])
df_mid = pd.DataFrame(middleMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])
df_end = pd.DataFrame(endMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])

    
df_threeMarkers = pd.merge(pd.merge(df_tip, df_mid, on='timeStamp', how = 'inner', suffixes = ['_tip', '_mid']), df_end, on = 'timeStamp', how = 'inner')
df_threeMarkers['label'] = getKeypressLabels(df_threeMarkers, phoneData)


In [14]:
deltaTime = np.diff(df_threeMarkers['timeStamp']) / np.timedelta64(1, 's')

df_threeMarkersWithSpeed = df_threeMarkers.copy()
for name, column in df_threeMarkers.iteritems():
    if name != 'timeStamp' and name != 'label':
        speed = pd.DataFrame(np.divide(np.diff(df_threeMarkers[name]),deltaTime), columns = ['delta_{}'.format(name)])
        speed.index += 1
        df_threeMarkersWithSpeed = pd.merge(df_threeMarkersWithSpeed, speed, left_index=True, right_index=True)
        
df_threeMarkersWithSpeed = df_threeMarkersWithSpeed.replace([np.inf, -np.inf], np.nan).dropna()


divide by zero encountered in true_divide


invalid value encountered in true_divide



In [15]:
len(df_threeMarkers)

153260

In [16]:
speedYTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = df_threeMarkersWithSpeed['delta_y_tip'][40000:50000],
    mode = 'lines+markers',
    name = 'speedMarkerY'
)

labelTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = df_threeMarkersWithSpeed['label'][40000:50000],
    mode = 'markers',
    name = 'label'
)

phoneDataTrace = go.Scatter(
    x = [datum['timeStamp'] for datum in phoneData],
    y = [datum['eventID'] for datum in phoneData],
    mode = 'markers',
    name = 'label'
)
data = [speedYTrace, labelTrace, phoneDataTrace]
fig = go.Figure(data = data)

py.offline.iplot(fig, filename = "leftSpeedYTrace")

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
test_size = 0.2

features = [name for name, column in df_threeMarkersWithSpeed.iteritems() if name != 'timeStamp' and name != 'label']
X_train, X_test, y_train, y_test = train_test_split(df_threeMarkersWithSpeed[features], df_threeMarkersWithSpeed['label'], test_size = test_size)

In [21]:
from IPython.lib import backgroundjobs as bg
NN_clf = MLPClassifier((40, 40))
jobs = bg.BackgroundJobManager()

jobs.new('NN_clf.fit(np.array(X_train), np.array(y_train))')
jobs.status()

Running jobs:
0 : NN_clf.fit(np.array(X_train), np.array(y_train))



In [24]:
# All features
from sklearn.metrics import classification_report

print(classification_report(y_train, NN_clf.predict(X_train)))
print(classification_report(y_test, NN_clf.predict(X_test)))

              precision    recall  f1-score   support

         0.0       0.98      0.95      0.97    100465
         1.0       0.81      0.89      0.85     21522

   micro avg       0.94      0.94      0.94    121987
   macro avg       0.89      0.92      0.91    121987
weighted avg       0.95      0.94      0.94    121987

              precision    recall  f1-score   support

         0.0       0.98      0.95      0.96     25174
         1.0       0.80      0.89      0.84      5323

   micro avg       0.94      0.94      0.94     30497
   macro avg       0.89      0.92      0.90     30497
weighted avg       0.95      0.94      0.94     30497



In [25]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(NN_clf, df_threeMarkersWithSpeed[features], df_threeMarkersWithSpeed['label'], cv = 5)

In [28]:
dump(NN_clf, 'NN_left_clf.joblib')

['NN_left_clf.joblib']