In [12]:
import matplotlib.pyplot as plt
from plotly import tools
from mpl_toolkits.mplot3d import Axes3D
import collections
import numpy as np
import plotly as py
import plotly.graph_objs as go
import datetime
from sklearn.model_selection import train_test_split
import pandas as pd
from joblib import load, dump
from numpy.linalg import norm, inv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_json
py.offline.init_notebook_mode(connected = True)


In [34]:
# Model reconstruction from JSON file
with open('tf_NN_focal_clf_architecture.json', 'r') as f:
    tf_model = model_from_json(f.read())

# Load weights into the new model
tf_model.load_weights('tf_NN_focal_clf_weights.h5')

In [31]:
NN_clf = load("NN_left_clf.joblib")
NN_clf_with_transform = load("NN_left_clf_relative_to_screen.joblib")

In [16]:
def FindPhoneDataIntervals(phoneData):
    intervals = []
    
    currentInterval = []
    currentCoordinate = None
    multiKeypressInterval = []
    multiKeypressCoordinate = None
    for datum in phoneData:
        if datum['eventID'] == 0:
            currentInterval.append(datum['timeStamp'])
            currentCoordinate = datum['x']
        
        if datum['eventID'] == 5:
            if currentInterval == []:
                currentInterval.append(datum['timeStamp'])
            else:
                multiKeypressInterval.append(datum['timeStamp'])
                multiKeypressCoordinate = datum['x']
        
        if len(currentInterval) == 0 and len(multiKeypressInterval) == 0:
            print("Failed to match a key release with the following time stamp: {}".format(datum['timeStamp']))
            continue
            
        if datum['eventID'] == 1:
            currentInterval.append(datum['timeStamp'])
            intervals.append(currentInterval)
            currentInterval = []
            multiKeypressInterval = []
                
        if datum['eventID'] == 6:
            if len(currentInterval) == 1 and len(multiKeypressInterval) == 1:
                if abs(datum['x'] - multiKeypressCoordinate) < abs(datum['x'] - currentCoordinate):
                    multiKeypressInterval.append(datum['timeStamp'])
                    intervals.append(multiKeypressInterval)
                else:
                    currentInterval.append(datum['timeStamp'])
                    intervals.append(currentInterval)
                    currentInterval = multiKeypressInterval
                    
                multiKeypressInterval = []
                
            else:
                currentInterval.append(datum['timeStamp'])
                intervals.append(currentInterval)
                currentInterval = []
                
            
            
    return intervals

def getKeypressLabels(thumbData, phoneData):
    keypressLabels = np.zeros(len(thumbData))
    
    phoneDataIntervals = FindPhoneDataIntervals(phoneData)
    intervalIndex = 0
    currentInterval = phoneDataIntervals[intervalIndex]
    
    for thumbDataIndex, thumbDatum in thumbData.iterrows():
        while currentInterval[1] < thumbDatum['timeStamp']:
            intervalIndex += 1
            
            if intervalIndex > len(phoneDataIntervals):
                return keypressLabels
            
            currentInterval = phoneDataIntervals[intervalIndex]
        
        if thumbDatum['timeStamp'] >= currentInterval[0] and thumbDatum['timeStamp'] <= currentInterval[1]:
            keypressLabels[thumbDataIndex] = 1
            
        elif thumbDatum['timeStamp'] < currentInterval[0]:
            continue
            
    return keypressLabels
            
            

In [17]:
keyboardDataFileName = 'keyboard_data.txt'
g_thumbKeyboardPositions = collections.defaultdict(np.array)
with open(keyboardDataFileName, 'r') as file:
    for line in file:
        posX, posY, letter = line.split()
        g_thumbKeyboardPositions[letter] = np.array([float(posX), float(posY)])

g_thumbKeyboardPositions[" "] = np.array([900., 850.]) 

def gridCoordinateSearch(coordinate, thumbKeyboardPositions):
    leadingLetters = ["q", "a","z", " "]
    rows = ["qwertyuiop", "asdfghjkl", "zxcvbnm", " "]
    closestRowIndex = min([ind for ind in range(len(leadingLetters))], key = lambda ind: abs(coordinate[1] - thumbKeyboardPositions[leadingLetters[ind]][1]))
    
    closestLetter = min([letter for letter in rows[closestRowIndex]], key = lambda letter: abs(coordinate[0] - thumbKeyboardPositions[letter][0]))
    
    return closestLetter

gridCoordinateSearch(np.array([000.,900.]), g_thumbKeyboardPositions)
        
    

' '

In [18]:
markersDataFileName = "2019-03-13-15-49-57-472_markers_left_hand.txt"
phoneDataFileName = "2019-03-13-15-49-57-467_left_hand.txt"

In [19]:
markersData = []
phoneData = []

with open(phoneDataFileName, 'r') as file:
    itemHeaders = ["timeStamp", "logIndex", "keyboardID", "eventID", "x", "y"]
    for line in file:
        logEntry = line.split()
        if len(logEntry) == len(itemHeaders):
            phoneDatum = {itemName: data for itemName, data in zip(itemHeaders, logEntry)}
            phoneDatum["timeStamp"] = datetime.datetime.strptime(phoneDatum["timeStamp"], "%Y-%m-%d-%H-%M-%S-%f")
            phoneDatum["eventID"] = int(phoneDatum["eventID"])
            phoneDatum["logIndex"] = int(phoneDatum["logIndex"])
            phoneDatum["x"] = float(phoneDatum["x"])
            phoneDatum["y"] = float(phoneDatum["y"])          
            phoneData.append(phoneDatum)
        
with open(markersDataFileName, 'r') as file:
    itemHeaders = ["timeStamp", "logIndex", "markerID", "x", "y", "z"]
    for line in file:
        if logEntry != "":
            logEntry = line.split()
            if len(logEntry) == len(itemHeaders):
                timeStamp = datetime.datetime.strptime(logEntry[0], "%Y-%m-%d-%H-%M-%S-%f")
                if timeStamp >= phoneData[0]['timeStamp'] and timeStamp <= phoneData[-1]['timeStamp']:
                    markersDatum = {itemName: data for itemName, data in zip(itemHeaders, logEntry)}
                    markersDatum["timeStamp"] = timeStamp
                    markersDatum["markerID"] = int(markersDatum["markerID"])
                    markersDatum["logIndex"] = int(markersDatum["logIndex"])
                    markersDatum["x"] = float(markersDatum["x"])
                    markersDatum["y"] = float(markersDatum["y"])
                    markersDatum['z'] = float(markersDatum['z'])
                    markersData.append(markersDatum)

markersDataDictByID = collections.defaultdict(list)

for markersDatum in markersData:
    markersDataDictByID[markersDatum['markerID']].append(markersDatum)

In [6]:
plotData = collections.defaultdict(list)
markersData.sort(key = lambda datum:datum['timeStamp'])
for data in markersData[:20000]:
    plotData[data["markerID"]].append([float(data['x']), float(data['y']), float(data['z'])])

traces = []
for markerID, trajectory in plotData.items():
    trajectory = np.array(trajectory)
    trace = go.Scatter3d(
        x=trajectory[:,0],
        y=trajectory[:,1],
        z=trajectory[:,2],
        mode='lines',
        name = markerID
    )
    traces.append(trace)
    
fig = go.Figure(data = traces)

py.offline.plot(fig, filename = "left hand trajectory test set")


Your filename `left hand trajectory test set` didn't end with .html. Adding .html to the end of your file.



'file://C:\\Users\\Dave Lei\\Part IIB project\\Data logging\\left hand trajectory test set.html'

In [20]:
tipMarkerIDs = [54303]
middleMarkerIDs = [54280]
endMarkerIDs = [54290]

topMarkerLeftID = 50090
topMarkerRightID = 51020
sideMarkerLeftID = 51035

g_calibrationPositions = []

for markerID in [topMarkerLeftID, topMarkerRightID, sideMarkerLeftID]:
    mean = np.mean([np.array([datum['x'], datum['y'], datum['z']]) for datum in markersDataDictByID[markerID][:1]], axis = 0)
    g_calibrationPositions.append(mean)
    
g_referencePoint = g_calibrationPositions[0]

In [21]:
g_calibrationPositions

[array([-0.1130678,  0.7825412,  0.5232269]),
 array([5.866859e-04, 7.746999e-01, 5.192038e-01]),
 array([-0.1385423,  0.7663757,  0.5191758])]

In [22]:
# CalibrationPosition 0 and 1: Left Top Marker , Right Top Marker, 2: Left Side Marker
def findRotationMatrix(calibrationPositions):
    vector1 = calibrationPositions[1] - calibrationPositions[0]
    vector2 = calibrationPositions[2] - calibrationPositions[0]
    
    yVector = np.cross(vector1, vector2)
    
    xRowVector = vector1 / norm(vector1)
    yRowVector = yVector/ norm(yVector)
    zRowVector = np.cross(xRowVector, yRowVector)
    
    inverseTransform = np.array([xRowVector, yRowVector, zRowVector]).transpose()
    
    transformMatrix = inv(inverseTransform)
    return transformMatrix
    
g_rotationMatrix = findRotationMatrix(g_calibrationPositions)


In [23]:
from copy import deepcopy

tipMarkerData, middleMarkerData, endMarkerData = [], [], []

def changeDataReference(datum): 
    oldPosition = np.array([datum['x'], datum['y'], datum['z']]).transpose()
    newX, newY, newZ = np.matmul(g_rotationMatrix, oldPosition - g_referencePoint)
    return newX, newY, newZ


for markerID in tipMarkerIDs:
    for datum in markersDataDictByID[markerID]:
        new_datum = deepcopy(datum)
        new_datum['x'], new_datum['y'], new_datum['z'] = changeDataReference(datum)
        tipMarkerData.append(new_datum)
        
for markerID in middleMarkerIDs:
    for datum in markersDataDictByID[markerID]:
        new_datum = deepcopy(datum)
        new_datum['x'], new_datum['y'], new_datum['z'] = changeDataReference(datum)
        middleMarkerData.append(new_datum)
        
for markerID in endMarkerIDs:
    for datum in markersDataDictByID[markerID]:
        new_datum = deepcopy(datum)
        new_datum['x'], new_datum['y'], new_datum['z'] = changeDataReference(datum)
        endMarkerData.append(new_datum)
    

In [24]:
df_tip = pd.DataFrame(tipMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])
df_mid = pd.DataFrame(middleMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])
df_end = pd.DataFrame(endMarkerData, columns = ['timeStamp', 'x', 'y', 'z'])

    
df_threeMarkers = pd.merge(pd.merge(df_tip, df_mid, on='timeStamp', how = 'inner', suffixes = ['_tip', '_mid']), df_end, on = 'timeStamp', how = 'inner')
df_threeMarkers['label'] = getKeypressLabels(df_threeMarkers, phoneData)


In [25]:
deltaTime = np.diff(df_threeMarkers['timeStamp']) / np.timedelta64(1, 's')

df_threeMarkersWithSpeed = df_threeMarkers.copy()
for name, column in df_threeMarkers.iteritems():
    if name != 'timeStamp' and name != 'label':
        speed = pd.DataFrame(np.divide(np.diff(df_threeMarkers[name]),deltaTime), columns = ['delta_{}'.format(name)])
        speed.index += 1
        df_threeMarkersWithSpeed = pd.merge(df_threeMarkersWithSpeed, speed, left_index=True, right_index=True)
        
df_threeMarkersWithSpeed = df_threeMarkersWithSpeed.replace([np.inf, -np.inf], np.nan).dropna()


divide by zero encountered in true_divide


invalid value encountered in true_divide



In [27]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
test_size = 0.2

features = [name for name, column in df_threeMarkersWithSpeed.iteritems() if name != 'timeStamp' and name != 'label']
X_train, X_test, y_train, y_test = train_test_split(df_threeMarkersWithSpeed[features], df_threeMarkersWithSpeed['label'], test_size = test_size)

In [13]:
# All features
from sklearn.metrics import classification_report

print(classification_report(y_train, NN_clf.predict(X_train)))
print(classification_report(y_test, NN_clf.predict(X_test)))

              precision    recall  f1-score   support

         0.0       0.98      0.94      0.96     49642
         1.0       0.75      0.92      0.82      9754

   micro avg       0.94      0.94      0.94     59396
   macro avg       0.87      0.93      0.89     59396
weighted avg       0.94      0.94      0.94     59396

              precision    recall  f1-score   support

         0.0       0.98      0.94      0.96     12390
         1.0       0.75      0.93      0.83      2459

   micro avg       0.94      0.94      0.94     14849
   macro avg       0.87      0.93      0.89     14849
weighted avg       0.95      0.94      0.94     14849



In [35]:
# All features with transform
from sklearn.metrics import classification_report

print(classification_report(y_train, NN_clf_with_transform.predict(X_train)))
print(classification_report(y_test, NN_clf_with_transform.predict(X_test)))

              precision    recall  f1-score   support

         0.0       0.99      0.88      0.93     49629
         1.0       0.62      0.97      0.76      9767

   micro avg       0.90      0.90      0.90     59396
   macro avg       0.81      0.93      0.85     59396
weighted avg       0.93      0.90      0.91     59396

              precision    recall  f1-score   support

         0.0       0.99      0.89      0.94     12403
         1.0       0.63      0.97      0.76      2446

   micro avg       0.90      0.90      0.90     14849
   macro avg       0.81      0.93      0.85     14849
weighted avg       0.93      0.90      0.91     14849



In [30]:
speedYTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = df_threeMarkersWithSpeed['delta_z_tip'][40000:50000],
    mode = 'lines+markers',
    name = 'speedMarkerY'
)

labelTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = df_threeMarkersWithSpeed['label'][40000:50000],
    mode = 'markers',
    name = 'label'
)

predictedTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = NN_clf_with_transform.predict(df_threeMarkersWithSpeed[features][40000:50000]),
    mode = 'markers',
    name = 'predictedLabel'
)


#data = [accelYTrace]
data = [speedYTrace,  predictedTrace, labelTrace]
fig = go.Figure(data = data)

py.offline.iplot(fig, filename = "predictedTrace")

In [35]:
# All features with transformation matrix (focal loss)
from sklearn.metrics import classification_report

print(classification_report(y_train, np.rint(tf_model.predict(X_train))))
print(classification_report(y_test, np.rint(tf_model.predict(X_test))))

              precision    recall  f1-score   support

         0.0       1.00      0.88      0.94     49679
         1.0       0.62      0.98      0.76      9717

   micro avg       0.90      0.90      0.90     59396
   macro avg       0.81      0.93      0.85     59396
weighted avg       0.93      0.90      0.91     59396

              precision    recall  f1-score   support

         0.0       1.00      0.88      0.93     12353
         1.0       0.62      0.98      0.76      2496

   micro avg       0.90      0.90      0.90     14849
   macro avg       0.81      0.93      0.85     14849
weighted avg       0.93      0.90      0.90     14849



In [31]:
labelTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y = df_threeMarkersWithSpeed['label'][40000:50000],
    mode = 'markers',
    name = 'label'
)

predictedLabelTrace = go.Scatter(
    x = df_threeMarkersWithSpeed['timeStamp'][40000:50000],
    y =np.rint(tf_model.predict(df_threeMarkersWithSpeed[features][40000:50000])).flatten(),
    mode = 'markers',
    name = 'label'
)



data = [labelTrace, predictedLabelTrace]
fig = tools.make_subplots(rows=2, cols=1, shared_xaxes=True)

fig.append_trace(labelTrace, 1, 1)
fig.append_trace(predictedLabelTrace, 2, 1)

py.offline.iplot(fig, filename = "tf_model")

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x1,y2 ]

