In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import glob

In [2]:
df = pd.concat([pd.read_csv(f) for f in glob.glob('interactions-UID_*.csv')], ignore_index = True)

In [3]:
df = df.replace(to_replace = np.nan, 
                 value = 0)
df[df==np.inf]=np.nan
df.fillna(df.mean(), inplace=True)

In [4]:
features = ['interStrokeTime','strokeDuration','startX','startY','stopX','stopY','distanceEndToEnd','meanResultantLength','UDLRflag','directionEndToEnd','20pcPairwiseVelocity','50pcPairwiseVelocity','80pcPairwiseVelocity','20pcPairwiseAccel','50pcPairwiseAccel','80pcPairwiseAccel','medianVelocityLast3Points','largestDeviationEndToEnd','20pcDeviationEndToEnd','50pcDeviationEndToEnd','80pcDeviationEndToEnd','averageDirection','trajectoryLength','distE2EandtrajLength','averageVelocity','medianAccellerationAtFirst5points','midStrokePressure','midStrokeAreaCovered','midStrokeFingerOrientation','changeFingerOrientation','phoneOrientation','docID','phoneID']
x = df.loc[:, features].values

In [5]:
y = df.loc[:,['userID']].values

In [6]:
x_scaled = StandardScaler().fit_transform(x)

In [7]:
pd.DataFrame(data = x_scaled, columns = features)

Unnamed: 0,interStrokeTime,strokeDuration,startX,startY,stopX,stopY,distanceEndToEnd,meanResultantLength,UDLRflag,directionEndToEnd,...,distE2EandtrajLength,averageVelocity,medianAccellerationAtFirst5points,midStrokePressure,midStrokeAreaCovered,midStrokeFingerOrientation,changeFingerOrientation,phoneOrientation,docID,phoneID
0,-0.000169,-0.423700,0.955605,0.126532,-1.283096,-0.479598,0.986518,0.337695,1.417415,-2.063726,...,0.387059,0.046811,-0.039082,0.996462,1.577443,-0.10869,0.0,-0.236419,0.406377,-0.638544
1,0.000155,-0.559132,0.932894,0.631638,-1.410321,-0.479598,1.206494,0.030156,1.417415,-1.950336,...,0.270127,0.950036,-0.037485,1.097627,2.394702,-0.10869,0.0,-0.236419,0.406377,-0.638544
2,-0.000174,-0.463048,-1.417951,-0.414763,1.494547,-0.070042,2.064413,0.797119,-1.086001,-0.078725,...,0.295646,0.591067,-0.030804,1.047044,2.394702,-0.10869,0.0,-0.236419,0.406377,-0.638544
3,-0.000162,-0.487756,-1.565594,-0.065889,1.505147,0.129485,2.217303,1.019343,-1.086001,-0.105823,...,0.418564,0.767478,-0.045908,1.654034,2.803331,-0.10869,0.0,-0.236419,0.406377,-0.638544
4,-0.000182,-0.477690,1.296376,0.836196,-1.484524,-0.605615,1.829362,-0.798730,1.417415,-1.907970,...,-0.181878,0.725189,-0.033992,1.299957,3.620774,-0.10869,0.0,-0.236419,0.406377,-0.638544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21153,0.000424,1.203308,-0.747933,-0.847712,-1.049857,2.020032,2.480747,0.966383,0.165707,0.746666,...,0.398758,-0.609106,-0.040133,0.945879,1.577443,-0.10869,0.0,-0.236419,-0.754043,-0.638544
21154,0.001133,0.601187,-1.406595,0.980513,-1.537536,3.269828,2.017428,1.026994,0.165707,0.727699,...,0.384665,-0.548450,-0.040395,1.805782,1.986072,-0.10869,0.0,-0.236419,-0.754043,-0.638544
21155,0.000501,1.307627,-1.486094,-0.727448,-1.622350,2.503127,2.961654,0.988791,0.165707,0.722145,...,0.276874,-0.592953,-0.041056,0.945879,0.760000,-0.10869,0.0,-0.236419,-0.754043,-0.638544
21156,0.000303,0.576480,-1.508805,-0.571106,-1.579948,2.608151,2.916930,1.048146,0.165707,0.712100,...,0.385916,-0.480135,-0.040325,0.642384,0.351371,-0.10869,0.0,-0.236419,-0.754043,-0.638544


In [8]:
train_pct_index = int(0.8 * len(x_scaled))
X_train, X_test = x_scaled[:train_pct_index], x_scaled[train_pct_index:]
y_train, y_test = y[:train_pct_index], y[train_pct_index:]

In [50]:
pca = PCA().fit(x_scaled)

In [51]:
pca = PCA(n_components=20)
x_reduced = pca.fit_transform(x_scaled)
principalDf = pd.DataFrame(data = x_reduced
             , columns = ['principal component 1', 'principal component 2','principal component 3', 'principal component 4','principal component 5', 'principal component 6','principal component 7', 'principal component 8','principal component 9', 'principal component 10','principal component 11', 'principal component 12','principal component 13', 'principal component 14','principal component 15', 'principal component 16','principal component 17', 'principal component 18','principal component 19', 'principal component 20'])

principalDf.head(20)

Unnamed: 0,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,principal component 9,principal component 10,principal component 11,principal component 12,principal component 13,principal component 14,principal component 15,principal component 16,principal component 17,principal component 18,principal component 19,principal component 20
0,2.082493,-1.615634,-1.056804,0.379364,0.132815,1.562285,-0.886825,-0.205907,-1.459704,0.056158,0.682038,-1.161981,-1.890824,0.112649,-0.110155,0.511048,-0.325054,-0.60518,-0.32892,1.283186
1,4.189172,-2.532083,-1.423491,0.924696,0.921155,0.504413,-0.523819,0.388297,-1.471504,-1.260269,2.018773,-1.114319,-1.861933,0.190907,-0.780788,-0.64023,2.076109,-1.063487,-0.080246,0.196648
2,0.797883,3.472874,-1.304161,0.834214,2.14016,-0.669098,-1.109345,-0.921439,0.377234,-1.041611,1.531094,-0.675536,-1.296895,-0.142577,-0.854766,0.674986,-0.86914,-0.207838,-0.036475,-0.164606
3,1.147363,2.254397,-1.226318,0.884979,2.340981,-1.44046,-1.438959,-0.862135,0.191028,-0.75185,1.447316,-0.848018,-1.546157,0.239729,-0.573344,1.263979,-0.849984,-0.494099,-0.159768,-0.246987
4,3.007121,-5.021063,-0.859257,1.077232,2.160451,1.066666,-0.382093,0.089912,-1.336387,-0.867741,2.265653,-1.482688,-2.665643,0.233484,-1.507818,0.363491,-0.628532,-0.986645,-0.168173,0.103793
5,2.923549,-1.437473,-1.23394,1.013593,-0.202836,1.026728,-0.846538,0.231667,-1.434284,-0.798882,1.546269,-1.085081,-1.801957,0.599599,-0.43704,0.173527,0.921924,-1.091356,-0.262114,0.237402
6,-2.039038,-1.220264,0.922956,1.26965,-1.90364,-0.238764,-0.415345,-2.516536,2.533555,-0.864862,-0.074827,0.382613,0.741033,2.173576,-1.408953,-0.879837,0.895902,-1.318488,-2.794243,1.494816
7,2.53858,2.188764,-1.693398,0.725202,1.82951,-1.961893,-0.940435,-0.674526,0.176185,-0.947193,1.793791,-0.311536,-0.617622,0.085535,-0.175227,0.60303,0.045623,-0.413739,-0.034747,-0.331011
8,2.67382,2.364311,0.190695,0.333421,1.292025,-1.656785,-0.873304,-0.108206,0.925864,-0.144856,1.512641,-0.465748,-0.845646,-0.007231,-0.089505,0.675035,-0.147717,-0.401303,-0.12,-0.02087
9,3.701708,-2.990826,-1.526485,1.083952,1.642556,1.0549,0.011326,0.150335,-1.797425,-0.635792,1.951605,-1.211828,-2.106202,-0.116126,-0.841659,-0.281324,0.323365,-0.661597,-0.156435,0.387927


In [52]:
df[['userID']].head()

Unnamed: 0,userID
0,1
1,1
2,1
3,1
4,1


In [56]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_reduced, y, test_size=0.3, random_state=109)

In [62]:
from sklearn import svm
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

  y = column_or_1d(y, warn=True)


In [67]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

[[ 81   1   0 ...   0   0   1]
 [  2 317   0 ...   5   0   0]
 [  0   0  59 ...   0   0   0]
 ...
 [  2  12   0 ...  42   0   0]
 [  0   0   0 ...   0  59   0]
 [  1   1   0 ...   0   0  33]]
              precision    recall  f1-score   support

           1       0.63      0.68      0.65       120
           2       0.68      0.85      0.75       373
           3       0.32      0.25      0.28       234
           4       0.65      0.49      0.56        79
           5       0.45      0.42      0.43       125
           6       0.70      0.75      0.72       161
           7       0.49      0.57      0.53       162
           8       0.63      0.70      0.66       191
           9       0.78      0.83      0.81       125
          10       0.51      0.54      0.53       134
          11       0.47      0.65      0.55       125
          12       0.67      0.70      0.69       111
          13       0.33      0.20      0.25        79
          14       0.63      0.89      0.74       1