In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score

In [2]:
# Read the data file
df = pd.read_pickle('100_fingerprints_expanded_dataset.pkl')

In [3]:
# --------------------------------------
# SECOND STAGE MODEL TRAINING
# --------------------------------------
# Split training data into separate sets for right and left hand
dflh = df[df.Hand==0]
dfrh = df[df.Hand==1]

# Left hand training
Xlh = list(dflh["Image"])
ylh = dflh["Identity"]
X_trainlh, X_testlh, y_trainlh, y_testlh = train_test_split(Xlh,ylh,test_size=0.2) # ERROR! Wrong use of training set.
model_lid = RandomForestClassifier() # "lid" short for "left hand, identity"
model_lid.fit(X_trainlh,y_trainlh)

# Right hand training
Xrh = list(dfrh["Image"])
yrh = dfrh["Identity"]
X_trainrh, X_testrh, y_trainrh, y_testrh = train_test_split(Xrh,yrh,test_size=0.2)
model_rid = RandomForestClassifier() # "rid" short for "right hand, identity"
model_rid.fit(X_trainrh,y_trainrh)
# --------------------------------------
# END OF SECOND STAGE MODEL TRAINING
# --------------------------------------

RandomForestClassifier()

In [4]:
# -------------------------------------
# FIRST STAGE MODEL TRAINING
# -------------------------------------
X_train = X_trainlh+X_trainrh
X_test = X_testlh+X_testrh
ytr = pd.concat([y_trainrh,y_trainlh])
yte = pd.concat([y_testrh,y_testlh])
y_train = []
y_test = []

In [5]:
for i in range(len(ytr)):
    j = ytr.index[i]
    y_train.append([df["Hand"][j],j])

In [6]:
y_train

[[1, 2853],
 [1, 3430],
 [1, 2989],
 [1, 9716],
 [1, 9248],
 [1, 3254],
 [1, 3279],
 [1, 8936],
 [1, 2563],
 [1, 9224],
 [1, 7718],
 [1, 9492],
 [1, 9454],
 [1, 5884],
 [1, 1296],
 [1, 744],
 [1, 2986],
 [1, 9899],
 [1, 7852],
 [1, 5508],
 [1, 2565],
 [1, 1490],
 [1, 3303],
 [1, 2164],
 [1, 8778],
 [1, 7010],
 [1, 1453],
 [1, 2108],
 [1, 6429],
 [1, 2936],
 [1, 4438],
 [1, 5576],
 [1, 9704],
 [1, 1492],
 [1, 6406],
 [1, 709],
 [1, 9887],
 [1, 5271],
 [1, 8608],
 [1, 7993],
 [1, 4651],
 [1, 6386],
 [1, 4649],
 [1, 2949],
 [1, 5808],
 [1, 9403],
 [1, 4611],
 [1, 6988],
 [1, 6419],
 [1, 5083],
 [1, 4857],
 [1, 8694],
 [1, 8088],
 [1, 8069],
 [1, 8759],
 [1, 5219],
 [1, 3309],
 [1, 7751],
 [1, 7437],
 [1, 7358],
 [1, 8002],
 [1, 886],
 [1, 4871],
 [1, 2072],
 [1, 8650],
 [1, 857],
 [1, 5679],
 [1, 5135],
 [1, 2533],
 [1, 4641],
 [1, 9894],
 [1, 7753],
 [1, 5493],
 [1, 9868],
 [1, 2312],
 [1, 841],
 [1, 9860],
 [1, 8962],
 [1, 5370],
 [1, 9827],
 [1, 4176],
 [1, 2975],
 [1, 8486],
 [1, 2027

In [7]:
for i in range(len(yte)):
    j = yte.index[i]
    y_test.append([df["Hand"][j],j])

In [8]:
y_test

[[1, 5377],
 [1, 9740],
 [1, 715],
 [1, 2314],
 [1, 1160],
 [1, 9726],
 [1, 2546],
 [1, 8709],
 [1, 3329],
 [1, 5051],
 [1, 5660],
 [1, 1824],
 [1, 9782],
 [1, 7009],
 [1, 897],
 [1, 5129],
 [1, 9443],
 [1, 7997],
 [1, 9470],
 [1, 6417],
 [1, 5826],
 [1, 7756],
 [1, 8514],
 [1, 2901],
 [1, 6938],
 [1, 4873],
 [1, 3244],
 [1, 7836],
 [1, 7925],
 [1, 5467],
 [1, 5345],
 [1, 7479],
 [1, 5692],
 [1, 1108],
 [1, 2504],
 [1, 1217],
 [1, 3478],
 [1, 7849],
 [1, 7859],
 [1, 4674],
 [1, 9222],
 [1, 7742],
 [1, 1877],
 [1, 1890],
 [1, 9213],
 [1, 9797],
 [1, 2909],
 [1, 3247],
 [1, 5363],
 [1, 2848],
 [1, 8049],
 [1, 9719],
 [1, 4647],
 [1, 2069],
 [1, 3324],
 [1, 5630],
 [1, 855],
 [1, 4875],
 [1, 7470],
 [1, 5478],
 [1, 7406],
 [1, 2135],
 [1, 1131],
 [1, 5857],
 [1, 5202],
 [1, 3280],
 [1, 6973],
 [1, 5394],
 [1, 402],
 [1, 2961],
 [1, 5866],
 [1, 7733],
 [1, 2194],
 [1, 5470],
 [1, 4336],
 [1, 4374],
 [1, 7048],
 [1, 2163],
 [1, 3227],
 [1, 2804],
 [1, 2177],
 [1, 7316],
 [1, 9892],
 [1, 860

In [9]:
y_train_indexfree = [i[0] for i in y_train]
y_train_indexfree

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [11]:
model_hand = RandomForestClassifier()
model_hand.fit(X_train,y_train_indexfree) # Fit the model fo training data
# -------------------------------------
# END OF FIRST STAGE MODEL TRAINING
# -------------------------------------

RandomForestClassifier()

In [13]:
# --------------------------------------
# MODEL IMPLEMENTATION
# --------------------------------------
pred_hand = model_hand.predict(X_test)

X_lh = []
y_lh = []
X_rh = []
y_rh = []

for i in range(len(pred_hand)):
    if pred_hand[i]==0:
        X_lh.append(X_test[i])
        j = y_test[i][1]
        y_lh.append(df["Identity"][j])
    else:
        X_rh.append(X_test[i])
        j = y_test[i][1]
        y_rh.append(df["Identity"][j])
        
pred_lid = model_lid.predict(X_lh)
pred_rid = model_rid.predict(X_rh)
# --------------------------------------
# END OF MODEL IMPLEMENTATION
# --------------------------------------

In [14]:
# --------------------------------------
# EVALUATION OF RESULTS
# --------------------------------------
# Calculate accuracy
len_lh = len(pred_lid)
len_rh = len(pred_rid)
score_lh = 0
score_rh = 0
for i in range(len_lh):
    if pred_lh[i] == y_lh[i]:
        score_lh += 1/len_lh
for i in range(len_rh):
    if pred_rh[i] == y_rh[i]:
        score_rh += 1/len_rh
        
# Print the result
print(score_lh)
print(score_rh)
# --------------------------------------
# END OF EVALUATION OF RESULTS
# --------------------------------------

NameError: name 'pred_lh' is not defined