# Euclidean Distance

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
original_df = pd.read_csv('keystroke.csv')
original_df

Unnamed: 0,subject,sessionIndex,rep,H.period,DD.period.t,UD.period.t,H.t,DD.t.i,UD.t.i,H.i,...,H.a,DD.a.n,UD.a.n,H.n,DD.n.l,UD.n.l,H.l,DD.l.Return,UD.l.Return,H.Return
0,s002,1,1,0.1491,0.3979,0.2488,0.1069,0.1674,0.0605,0.1169,...,0.1349,0.1484,0.0135,0.0932,0.3515,0.2583,0.1338,0.3509,0.2171,0.0742
1,s002,1,2,0.1111,0.3451,0.2340,0.0694,0.1283,0.0589,0.0908,...,0.1412,0.2558,0.1146,0.1146,0.2642,0.1496,0.0839,0.2756,0.1917,0.0747
2,s002,1,3,0.1328,0.2072,0.0744,0.0731,0.1291,0.0560,0.0821,...,0.1621,0.2332,0.0711,0.1172,0.2705,0.1533,0.1085,0.2847,0.1762,0.0945
3,s002,1,4,0.1291,0.2515,0.1224,0.1059,0.2495,0.1436,0.1040,...,0.1457,0.1629,0.0172,0.0866,0.2341,0.1475,0.0845,0.3232,0.2387,0.0813
4,s002,1,5,0.1249,0.2317,0.1068,0.0895,0.1676,0.0781,0.0903,...,0.1312,0.1582,0.0270,0.0884,0.2517,0.1633,0.0903,0.2517,0.1614,0.0818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20395,s057,8,46,0.0884,0.0685,-0.0199,0.1095,0.1290,0.0195,0.0945,...,0.1219,0.1383,0.0164,0.0820,0.1329,0.0509,0.1005,0.2054,0.1049,0.1047
20396,s057,8,47,0.0655,0.0630,-0.0025,0.0910,0.1148,0.0238,0.0916,...,0.1008,0.0512,-0.0496,0.1037,0.0868,-0.0169,0.1445,0.2206,0.0761,0.1198
20397,s057,8,48,0.0939,0.1189,0.0250,0.1008,0.1122,0.0114,0.0721,...,0.0913,0.1169,0.0256,0.0689,0.1311,0.0622,0.1034,0.2017,0.0983,0.0905
20398,s057,8,49,0.0923,0.1294,0.0371,0.0913,0.0990,0.0077,0.0992,...,0.0882,0.0821,-0.0061,0.0576,0.0697,0.0121,0.0979,0.1917,0.0938,0.0931


In [3]:
original_df['subject'] = original_df['subject'].apply(lambda x: int(x[2:]))

In [4]:
original_df.columns

Index(['subject', 'sessionIndex', 'rep', 'H.period', 'DD.period.t',
       'UD.period.t', 'H.t', 'DD.t.i', 'UD.t.i', 'H.i', 'DD.i.e', 'UD.i.e',
       'H.e', 'DD.e.five', 'UD.e.five', 'H.five', 'DD.five.Shift.r',
       'UD.five.Shift.r', 'H.Shift.r', 'DD.Shift.r.o', 'UD.Shift.r.o', 'H.o',
       'DD.o.a', 'UD.o.a', 'H.a', 'DD.a.n', 'UD.a.n', 'H.n', 'DD.n.l',
       'UD.n.l', 'H.l', 'DD.l.Return', 'UD.l.Return', 'H.Return'],
      dtype='object')

## Splitting into training set and test set

In [5]:
training_set = original_df[(original_df['sessionIndex']==1) | (original_df['sessionIndex']==3) | (original_df['sessionIndex']==5) | (original_df['sessionIndex']==7)]
test_set = original_df[(original_df['sessionIndex']==2) | (original_df['sessionIndex']==4) | (original_df['sessionIndex']==6) | (original_df['sessionIndex']==8)]


## Splitting further into hold times and up-down times

In [7]:
training_PKE = training_set[['subject','H.period','H.t','H.i','H.e','H.five','H.Shift.r','H.o','H.a','H.n', 'H.l']]
training_PKE = training_PKE.reset_index().drop('index',axis=1)

training_PKI = training_set[['subject','UD.period.t','UD.t.i','UD.i.e','UD.e.five','UD.five.Shift.r','UD.Shift.r.o','UD.o.a','UD.a.n','UD.n.l','UD.l.Return']]
training_PKI = training_PKI.reset_index().drop('index',axis=1)

test_PKE = test_set[['subject','H.period','H.t','H.i','H.e','H.five','H.Shift.r','H.o','H.a','H.n', 'H.l']]
test_PKE = test_PKE.reset_index().drop('index',axis=1)

test_PKI = test_set[['subject','UD.period.t','UD.t.i','UD.i.e','UD.e.five','UD.five.Shift.r','UD.Shift.r.o','UD.o.a','UD.a.n','UD.n.l','UD.l.Return']]
test_PKI = test_PKI.reset_index().drop('index',axis=1)


## Creating Threshold Profiles

In [8]:
training_PKE

Unnamed: 0,subject,H.period,H.t,H.i,H.e,H.five,H.Shift.r,H.o,H.a,H.n,H.l
0,2,0.1491,0.1069,0.1169,0.1417,0.1146,0.1067,0.1016,0.1349,0.0932,0.1338
1,2,0.1111,0.0694,0.0908,0.0829,0.0689,0.1570,0.1066,0.1412,0.1146,0.0839
2,2,0.1328,0.0731,0.0821,0.0808,0.0892,0.1454,0.1365,0.1621,0.1172,0.1085
3,2,0.1291,0.1059,0.1040,0.0900,0.0913,0.1454,0.0956,0.1457,0.0866,0.0845
4,2,0.1249,0.0895,0.0903,0.0805,0.0742,0.1243,0.0430,0.1312,0.0884,0.0903
...,...,...,...,...,...,...,...,...,...,...,...
10195,57,0.0644,0.0757,0.0602,0.1038,0.0803,0.0641,0.0784,0.0932,0.0808,0.1142
10196,57,0.0784,0.0715,0.0845,0.1051,0.0652,0.0610,0.0900,0.0910,0.0646,0.1092
10197,57,0.0604,0.0768,0.0757,0.0697,0.0472,0.0657,0.0836,0.0646,0.0784,0.0926
10198,57,0.0749,0.0723,0.0744,0.0929,0.0755,0.0628,0.0971,0.0916,0.0766,0.0886


In [9]:
from sklearn.metrics.pairwise import euclidean_distances

training_euc = []

for row in range(0,10200):
    if training_PKE.iloc[row]['subject'] == training_PKI.iloc[row]['subject']:
        training_euc.append(euclidean_distances(np.array(training_PKE.drop('subject',axis=1).iloc[row]).reshape(1,-1),
                        np.array(training_PKI.drop('subject',axis=1).iloc[row]).reshape(1,-1)))

In [10]:
training_euc = np.array(training_euc).reshape(-1,1)
training_PKE['euc'] = training_euc

In [11]:
euc_profiles = training_PKE[['subject','euc']]
euc_profiles = euc_profiles.groupby('subject').mean()

In [12]:
euc_profiles['min_tresh'] = euc_profiles['euc']-0.04
euc_profiles['max_tresh'] = euc_profiles['euc']+0.04

In [13]:
euc_profiles = euc_profiles.reset_index()

In [14]:
euc_profiles

Unnamed: 0,subject,euc,min_tresh,max_tresh
0,2,0.576105,0.536105,0.616105
1,3,0.524142,0.484142,0.564142
2,4,0.510368,0.470368,0.550368
3,5,0.532244,0.492244,0.572244
4,7,0.367716,0.327716,0.407716
5,8,0.371871,0.331871,0.411871
6,10,0.253978,0.213978,0.293978
7,11,0.412279,0.372279,0.452279
8,12,0.47789,0.43789,0.51789
9,13,0.379616,0.339616,0.419616


In [15]:
test_PKE = test_PKE.reset_index().drop('index',axis=1)
test_PKI = test_PKI.reset_index().drop('index',axis=1)

## Classifying test set

In [45]:
np.set_printoptions(suppress=True)

In [84]:
import random
from sklearn.metrics.pairwise import euclidean_distances

count = 0

for i in range(0,500):
    
    rand = random.randint(0,10199)
    
    #choosing random sample among hold samples
    PKE = np.array(test_PKE.iloc[rand]).reshape(1,-1)
    PKE_profile = np.delete(PKE,0,axis=1)
    PKE_subject = PKE[0][0]
    
    #choosing random sample among up-down samples
    PKI = np.array(test_PKI.iloc[rand]).reshape(1,-1)
    PKI_profile = np.delete(PKI,0,axis=1)
    PKI_subject = PKI[0][0]
    
    threshold = euclidean_distances(PKE_profile,PKI_profile)
    
    #retrieving random user's actual threshold range
    actual_profile = np.array(euc_profiles[euc_profiles['subject']==PKE_subject].drop('subject',axis=1))
    
    #checking euclidean distance between hold sample and up-down sample to see if it is in the profile threshold
    if threshold>actual_profile[0][1] and threshold<actual_profile[0][2]:

        count = count + 1
        
print(count/500)

0.112
