# Fingerprint Spoof Detection
**Name:** Hunter Malinowski

**Date:** 4/9/24

### Read Data

In [32]:
# import libraries
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.feature import hog,local_binary_pattern
import glob
import numpy as np
from sklearn import svm
from skimage import feature
from sklearn.metrics import classification_report
from skimage.color import rgb2gray
from sklearn import neighbors, tree
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [24]:
# repress keepdims warning
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

In [2]:
# import training data
live_train = glob.glob('data/train_live/live/*.png') #positive
spoof_train = glob.glob('data/train_spoof/spoof/spoof/*.png') # negative

# import testing data
live_test = glob.glob('data/test_live/live/*.png') # positive
spoof_test = glob.glob('data/test_spoof/spoof/spoof/*.png') # negative

In [4]:
# initialize empty lists to append data
live_train_images = []
spoof_train_images = []
live_test_images = []
spoof_test_images = []

# read images from given path and append to the variavles
for path in live_train: live_train_images.append(imread(path))
for path in spoof_train: spoof_train_images.append(imread(path))
for path in live_test: live_test_images.append(imread(path))
for path in spoof_test: spoof_test_images.append(imread(path))

In [5]:
# convert images to np arrays
live_train_images, spoof_train_images = np.asarray(live_train_images), np.asarray(spoof_train_images)
live_test_images, spoof_test_images = np.asarray(live_test_images), np.asarray(spoof_test_images)

200
207
200
200


### HOG and LBP Functions

In [6]:
# change the type of feature you want to use here
feature_type = "lbp" #hog or lbp

# initialize empty lists to append features
live_train_features, spoof_train_features = [], []
live_test_features, spoof_test_features = [], []

if(feature_type == "hog"):
    
    # calculate and store hog features of training data
    for live_img, spoof_img in zip(live_train_images, spoof_train_images):
        live_train_features.append(hog(live_img, feature_vector = True))
        spoof_train_features.append(hog(spoof_img, feature_vector = True))
    
    # calculate and store hog features of testing data
    for live_img, spoof_img in zip(live_test_images, spoof_test_images):
        live_test_features.append(hog(live_img, feature_vector = True))
        spoof_test_features.append(hog(spoof_img, feature_vector = True))
        
if(feature_type == "lbp"):

# parameters copied from scikit docs of lbp
    METHOD = 'uniform'
    R = 3
    P = 8 * R

    # calculate and store lbp features of training data
    for live_img, spoof_img in zip(live_train_images, spoof_train_images):
        live_img = rgb2gray(live_img)
        spoof_img = rgb2gray(spoof_img)
        live_train_features.append(local_binary_pattern(live_img, P, R, METHOD).flatten())
        spoof_train_features.append(local_binary_pattern(spoof_img, P, R, METHOD).flatten())
    
    # calculate and store lbp features of training data   
    for live_img, spoof_img in zip(live_test_images, spoof_test_images):
        # convert rgb to gray
        live_img = rgb2gray(live_img)
        # convert rgb to gray
        spoof_img = rgb2gray(spoof_img)
        live_test_features.append(local_binary_pattern(live_img, P, R, METHOD).flatten())
        spoof_test_features.append(local_binary_pattern(spoof_img, P, R, METHOD).flatten())

In [17]:
# define labels 1 for live and 0 for spoof
# i.e. create a list of 200 1's and 200 0's 
labels_train = [[1]]*len(live_train_features) + [[0]]*len(spoof_train_features)

# merge both positive and negative training data
full_train = live_train_features + spoof_train_features

# assign labels for all test data, 1 for positive, 0 for negative
labels_test = [[1]]*len(live_test_features) + [[0]]*len(spoof_test_features)

# merge both positive and negative training data
full_test = live_test_features + spoof_test_features

In [18]:
features_train = np.array(full_train)
features_test = np.array(full_test)

# stack features and labels together
data_frame_train = np.hstack((features_train,labels_train))

# shuffle train data
np.random.shuffle(data_frame_train)

In [19]:
# split features and labels of train data
x_train = data_frame_train[:len(data_frame_train),:-1]
y_train = data_frame_train[:len(data_frame_train),-1]

# ready test data and labels
x_test = features_test
y_test = labels_test

### Model 1: Support Vector Model

In [20]:
m1 = svm.SVC()

# fit the model with train set
m1.fit(x_train,y_train)

# predict labels: y_pred from x_test
y_pred = m1.predict(x_test)

# calculate other metrics
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.99      0.94       200
           1       0.99      0.89      0.93       200

    accuracy                           0.94       400
   macro avg       0.94      0.94      0.94       400
weighted avg       0.94      0.94      0.94       400



### Model 2: KNN

In [25]:
m2 = neighbors.KNeighborsClassifier(n_neighbors = 5)
m2.fit(x_train, y_train)
y_pred = m2.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.15      0.26       200
           1       0.54      1.00      0.70       200

    accuracy                           0.57       400
   macro avg       0.77      0.57      0.48       400
weighted avg       0.77      0.57      0.48       400



### Model 3: Gaussian Naive Bayes

In [28]:
m3 = GaussianNB()
m3.fit(x_train, y_train)
y_pred = m3.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       200
           1       0.49      0.98      0.66       200

    accuracy                           0.49       400
   macro avg       0.25      0.49      0.33       400
weighted avg       0.25      0.49      0.33       400



### Model 4: Decision Tree

In [30]:
m4 = tree.DecisionTreeClassifier()
m4.fit(x_train, y_train)
y_pred = m4.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85       200
           1       1.00      0.65      0.79       200

    accuracy                           0.82       400
   macro avg       0.87      0.82      0.82       400
weighted avg       0.87      0.82      0.82       400



### Model 5: Random Forest

In [33]:
m5 = RandomForestClassifier(n_estimators=10)
m5.fit(x_train, y_train)
y_pred = m5.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.70      0.94      0.80       200
           1       0.90      0.59      0.72       200

    accuracy                           0.77       400
   macro avg       0.80      0.77      0.76       400
weighted avg       0.80      0.77      0.76       400

