In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import minmax_scale
from scipy.spatial.distance import cdist
from pyts.metrics import dtw
from ipycanvas import Canvas

## Features

In [2]:
def x_value(line):
    return line[1]


def y_value(line):
    return line[2]
 

def pressure_value(line):
    return line[3]


def vx_value(prev_line, current_line):
    x1 = prev_line[1]
    x2 = current_line[1]
    t1 = prev_line[0]
    t2 = current_line[0]

    if t2-t1 == 0:
        return 0
    return (x2-x1)/(t2-t1)


def vy_value(prev_line, current_line):
    y1 = prev_line[2]
    y2 = current_line[2]
    t1 = prev_line[0]
    t2 = current_line[0]
    if t2-t1 == 0:
        return 0
    return (y2-y1)/(t2-t1)


# Compute the "time serie", the features for each line of the signature file
def compute_features_vector(signature, normalize=True):
    rep = []
    first = True
    for line in signature:
        if first:
            prev_line = line
            first = False
        feature_vector = [x_value(line), y_value(line), pressure_value(
            line), vx_value(prev_line, line), vy_value(prev_line, line)]
        rep.append(feature_vector)
        prev_line = line

    array = np.asarray(rep)

    if normalize:
        array = minmax_scale(array, feature_range=(0, 1), axis=0)

    return array

def compute_dtw(signature1, signature2, normalize=True):

    feature_vector_1 = compute_features_vector(signature1, normalize)
    feature_vector_2 = compute_features_vector(signature2, normalize)

    dist_matrix = cdist(feature_vector_1, feature_vector_2)

    dtw_cost = dtw(precomputed_cost=dist_matrix, dist="precomputed",
                   method="sakoechiba")

    return dtw_cost


### Data preprocessing
We take the data contained in the enrollment txt files and store them in a dict (_users_signatures_), such that we reduce I/O calls in the rest of the code.

In [3]:
sig_gt = {}
n_users = 30

with open('data/users.txt', 'r') as f:
    users = [line[:-1] for line in f]
    
assert(len(users)==n_users), f'there should be {n_users} users'

for user in users:
    for i in range (1,6):
        sig = f'0{i}' if i < 10 else f'{i}'
        with open(f'data/verification/{user}-{sig}.txt', 'r') as f:
            signature = [np.asarray(line.split(), dtype=float).tolist() for line in f]
            t = (user,sig)
            sig_gt[t] = signature


### Signature drawing

We thought it could be cool to be able to redraw the signature from the given data

In [4]:
df = pd.DataFrame(signature,columns=["timestamp","x","y","pressure","penup","azimuth","inclinaison"])
df

Unnamed: 0,timestamp,x,y,pressure,penup,azimuth,inclinaison
0,0.00,49.64,32.36,239.0,1.0,350.0,54.0
1,0.01,50.31,33.16,359.0,0.0,349.0,53.0
2,0.02,50.58,32.98,361.0,0.0,347.0,54.0
3,0.03,50.79,32.65,361.0,0.0,347.0,54.0
4,0.04,50.79,32.21,377.0,0.0,346.0,54.0
...,...,...,...,...,...,...,...
452,4.52,59.27,30.04,806.0,0.0,336.0,55.0
453,4.53,58.89,29.69,796.0,0.0,336.0,55.0
454,4.54,59.17,29.90,785.0,0.0,335.0,54.0
455,4.55,59.89,30.61,748.0,0.0,336.0,55.0


In [5]:
canvas = Canvas(width=500, height=200, layout=dict(width="100%"))

df["x"] = df["x"] - df["x"].min()
df["y"] = df["y"] - df["y"].min()

max_y = df["y"].max() * 10 + 10
old_x = df.iloc[0,1]*10 + 10
old_y = max_y - df.iloc[0,2]*10

counter = 0
for i,row in enumerate(df.itertuples()):

    canvas.stroke_style = "white" if row[4] == 0 else "gray"
    canvas.line_width = row[4]/200
    new_x = row[2]*10 + 10
    new_y = max_y - row[3] * 10
    #print("i {}, {}, {}, {}, {}".format(i,old_x,old_y,row[2]*10,row[3]*10))
    canvas.stroke_line(old_x,old_y,new_x,new_y)
    old_x = new_x
    old_y = new_y
    
canvas

Canvas(height=200, layout=Layout(width='100%'), width=500)

In [6]:
canvas = Canvas(width=500, height=3000, layout=dict(width="100%"))

for i,t in enumerate(sig_gt.keys()):
        
        df = pd.DataFrame(sig_gt[t],columns=["timestamp","x","y","pressure","penup","azimuth","inclinaison"])

        df["x"] = df["x"] - df["x"].min()
        df["y"] = df["y"] - df["y"].min()
        
        x_offset = 10
        y_offset = i * 300 + 10

        max_y = df["y"].max() * 10 + y_offset

        old_x = df.iloc[0,1]*10 + x_offset
        old_y = max_y - df.iloc[0,2]*10

        for row in df.itertuples():

                canvas.stroke_style = "white" if row[4] == 0 else "gray"
                canvas.line_width = row[4]/200
                new_x = row[2]*10 + x_offset
                new_y = max_y - row[3] * 10
                #print("i {}, {}, {}, {}, {}".format(i,old_x,old_y,row[2]*10,row[3]*10))
                canvas.stroke_line(old_x,old_y,new_x,new_y)
                old_x = new_x
                old_y = new_y

        if i >= 10:
                break



canvas


Canvas(height=3000, layout=Layout(width='100%'), width=500)

### DTW values for enrollment
Here we compute the dtw value for each of the 5 signatures made by the users.

In [7]:
def thresh(values): return (9*np.mean(values) + max(values))/10

In [8]:
dtw_gt = {}
# Compute dtw values for the ground truth file (5 signatures/user)
for t in sig_gt.keys():
        u, s = t
        sig = sig_gt[t]
        values = [compute_dtw(sig,sig_gt[(uj,sj)]) for uj,sj in sig_gt.keys() if uj==u and s!=sj]
        dtw_gt[t] = thresh(values)

### Get signatures DTW

In [9]:
sig_ver = {}
for user in users:
    for i in range (1,46):
        sig = f'0{i}' if i < 10 else f'{i}'
        with open(f'data/verification/{user}-{sig}.txt', 'r') as f:
            signature = [np.asarray(line.split(), dtype=float).tolist() for line in f]
            t = (user,sig)
            sig_ver[t] = signature

In [10]:
dtw_ver = {}
for t in sig_ver.keys():
        u, s = t
        sig = sig_ver[t]
        values = [compute_dtw(sig,sig_gt[(uj,sj)]) for uj,sj in sig_gt.keys() if uj==u and s!=sj]
        dtw_ver[t] = thresh(values)

In [11]:
assert(len(dtw_ver)==1350), 'expected length of 1350 was not reached'

In [12]:
# ground truth computation
# false -> forgery
# true -> genuine
with open('data/gt.txt') as file:
    gt = {( line.split(' ')[0].split('-')[0], line.split(' ')[0].split('-')[1]) : True if line.split(' ')[-1][:-1]=='g' else False for line in file}