## Preprocessing and Data Augmentation
We filtered empty (due to not touching) and erroneous images (which do not contain the expected number of touches) to avoid wrong labeling. We extended the data set with flipped versions (vertical, horizontal, and both) of all remaining capacitive images to train the model for different device orientations. To train a position-invariant model and enable classification of multiple blobs within one capacitive image, we performed a blob detection, cropped the results and pasted each blob into an empty 15×27 matrix (referred to as blob image). The blob detection omitted all blobs that were not larger than one pixel of the image (4.1mm×4.1mm) as these can be considered as noise of the capacitive touchscreen.

In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets, sklearn.linear_model, sklearn.neighbors, sklearn.tree
import sklearn.discriminant_analysis

import sys, os, time
import io
import subprocess

from multiprocessing import Pool
from IPython import display

In [2]:
THRESHOLD = 30 # Threshold under which a measurement is considered as noise

def blob_detection(matrix, x, y, found):
    if x > 0 and x < len(matrix[0]) and y > 0 and y < len(matrix) and matrix[y][x] > THRESHOLD and ((x,y) not in found):
        found.append((x, y))
        blob_detection(matrix, x + 1, y, found) # right
        blob_detection(matrix, x - 1, y, found) # left
        blob_detection(matrix, x, y + 1, found) # top
        blob_detection(matrix, x, y - 1, found) # down
        
# Returns Blob Coordinates, connected tiles > 30
def get_blobs(matrix):
    blobs = []
    founds = []
    for y in range(0, len(matrix)):
        for x in range(0, len(matrix[0])):

            # blob detection
            found = []
            blob_detection(matrix, x, y, found)

            if (len(found) > 0):
                sorted_by_x = sorted(found, key=lambda x: x[0])
                sorted_by_y = sorted(found, key=lambda x: x[1])
                x_min = sorted_by_x[0][0]
                y_min = sorted_by_y[0][1]
                x_max = sorted_by_x[len(sorted_by_x) - 1][0]
                y_max = sorted_by_y[len(sorted_by_y) - 1][1]
                
                if (x_min - 1, x_max + 1, y_min - 1, y_max + 1) not in blobs and ((x_max - x_min) * (y_max - y_min)) > 1:
                    blobs.append((x_min - 1, x_max + 1, y_min - 1, y_max + 1))
                    founds.append(len(found))
    
    return blobs, founds


def get_blob_contents(matrix, blob_coordinates):
    matrices = []
    for c in blob_coordinates:
        matrices.append(matrix[c[2]:c[3], c[0]:c[1]])
    return matrices

def get_plt_rect(blob_coordinates):
    return Rectangle((blob_coordinates[2] - 0.5, blob_coordinates[0]  - 0.5), (blob_coordinates[1] - blob_coordinates[0]) + 1, (blob_coordinates[3] - blob_coordinates[2]) + 1, fill=None, alpha=1, lw=2, color='lime')

In [3]:
# Loading the output of the first Notebook.
# Since we did not upload the full output to github due to space constraints, this has to be generated first.
df = pd.read_pickle("data/data_1.pkl")

## Data Augmentation

In [4]:
# Mirroring L/R
def flip_lr (index):
    twodim = df.iloc[index].Matrix.reshape(27, 15)
    return np.fliplr(twodim).flatten()
p = Pool(15)
results_lr = p.map(flip_lr, range(len(df)))
p.close()
p.join()
df_lr = df.copy(deep=True)
df_lr.Matrix = results_lr

# Mirroring U/D
def flip_ud (index):
    twodim = df.iloc[index].Matrix.reshape(27, 15)
    return np.flipud(twodim).flatten()
p = Pool(15)
results_ud = p.map(flip_ud, range(len(df)))
p.close()
p.join()
df_ud = df.copy(deep=True)
df_ud.Matrix = results_ud

# Append to data frame. 
df_new = df.append(df_lr,ignore_index=True)
df_new = df_new.append(df_ud,ignore_index=True)

# Mirroring U/D of L/R
def flip_ud_of_lr (index):
    twodim = df_lr.iloc[index].Matrix.reshape(27, 15)
    return np.flipud(twodim).flatten()
p = Pool(15)
results_udoflr = p.map(flip_ud_of_lr, range(len(df_lr)))
p.close()
p.join()
df_udoflr = df.copy(deep=True)
df_udoflr.Matrix = results_udoflr

df = df_new.append(df_udoflr, ignore_index=True)

In [5]:
%%time
df['Blobcoords'] = df.Matrix.apply(lambda x : get_blobs(x.reshape(27, 15))[0])

CPU times: user 2h 6min 13s, sys: 16.1 s, total: 2h 6min 29s
Wall time: 2h 6min 29s


In [6]:
%%time 
df['Blobcount'] = df.Blobcoords.apply(lambda x : len(x))

CPU times: user 420 ms, sys: 8 ms, total: 428 ms
Wall time: 427 ms


In [7]:
%%time
df['Blobs'] = df.apply(lambda x : get_blob_contents(x.Matrix.reshape(27, 15), x.Blobcoords), axis=1)

CPU times: user 46.4 s, sys: 836 ms, total: 47.2 s
Wall time: 47.2 s


In [8]:
%%time
OH_TASKS = ["TAP", "DRAG", "SCROLL"]
TH_TASKS = ["PINCH", "ROTATE"]
PALM_TASK = ["PALM"]

valid1Blob = (df.Blobcount == 1) & (df.Task.isin(OH_TASKS + PALM_TASK))
valid2Blob = (df.Blobcount == 2) & (df.Task.isin(TH_TASKS))

df = df[valid1Blob | valid2Blob]

CPU times: user 388 ms, sys: 36 ms, total: 424 ms
Wall time: 423 ms


In [9]:
df = df.rename(columns={"Condition": "Hands"})
df.Hands.replace(to_replace={1: '2'}, inplace=True)
df.Hands.replace(to_replace={0: '1'}, inplace=True)
df.Hands = df.Hands.astype(int)

In [10]:
%%time
def getBlobImage(blobs):
    blobList = []
    for blob in blobs:
        blobList.append(np.pad(blob, ((0, 27 - blob.shape[0]),(0, 15 - blob.shape[1])), mode='constant', constant_values=0))
        
    return blobList

df['BlobImages'] = df.Blobs.apply(lambda x : getBlobImage(x))

CPU times: user 28.6 s, sys: 1.49 s, total: 30.1 s
Wall time: 30.1 s


In [11]:
df = df.drop(["Timestamp"], axis=1)
df = df.drop(["Blobs"], axis=1)
df = df.drop(["Blobcoords"], axis=1)
df = df.drop(["Matrix"], axis=1)

In [12]:
df.Task = df.Task.astype('category')

In [15]:
%%time
for i in range(1, len(df.Participant.unique())+1):
    print("Reading and Writing P" + str(i))
    dfP = df[df.Participant == i].copy(deep=True)
    dfP = dfP.reset_index(drop=True)
    dfP.to_pickle("../Dataset/blobimages_P" + str(i) + ".pkl", compression="gzip")

Reading and Writing P1
Reading and Writing P2
Reading and Writing P3
Reading and Writing P4
Reading and Writing P5
Reading and Writing P6
Reading and Writing P7
Reading and Writing P8
Reading and Writing P9
Reading and Writing P10
Reading and Writing P11
Reading and Writing P12
Reading and Writing P13
Reading and Writing P14
Reading and Writing P15
Reading and Writing P16
Reading and Writing P17
Reading and Writing P18
Reading and Writing P19
Reading and Writing P20
Reading and Writing P21
Reading and Writing P22
CPU times: user 3min 7s, sys: 1.42 s, total: 3min 8s
Wall time: 3min 8s
