In [1]:
from model import DogIdentificationModel
from params import DEVICE, CPU_DEVICE
# from data_loader_test import DogsDataSet_Test
# from data_loader_test_closest import DogsDataSet_Test
from data_loader_test_breed import DogsDataSet_Test
from loss import sigmoidL2
from sklearn import metrics
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import pandas as pd
import numpy as np

## Loading Model

In [2]:
# Loading model
model = DogIdentificationModel()

In [3]:
# Moving to training device
model = model.to(DEVICE)

In [4]:
# checkpoint = torch.load('dog-identification-model-triplet.pt')
# checkpoint = torch.load('dog-identification-model-breed.pt')
checkpoint = torch.load('dog-identification-model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
trainMeanLosses = checkpoint['trainMeanLosses']
validationMeanLosses = checkpoint['validationMeanLosses']

## Plotting model training results

In [None]:
plt.title('Epoch v. Training/Validation Loss')
plt.plot(checkpoint['trainMeanLosses'], label='Train')
plt.plot(checkpoint['validationMeanLosses'], label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Triplet Loss')
plt.legend()
plt.savefig('crossentropy_training.png')
plt.show()

## Computing Test Results

In [5]:
BATCH_SIZE = 30

In [6]:
# Defining the validation data loader
validationData = DogsDataSet_Test(dataType='test',filterBreed=True)

# Defining the Validation data loader
validationLoader = torch.utils.data.DataLoader(validationData, batch_size=BATCH_SIZE,
                                          shuffle=False, num_workers=4)

In [7]:
model = model.eval()

results = []
labels = []

# Defining loop to get the nice progress bar
loop = tqdm(enumerate(validationLoader), total=len(validationLoader), leave=True)

# Turning off the gradient
with torch.no_grad():
    for batchIndex, (img1, img2, label) in loop:
        img1 = img1.to(DEVICE)
        img2 = img2.to(DEVICE)

        img1Encoding = model(img1)
        img2Encoding = model(img2)       
        
        
        distance = sigmoidL2(img1Encoding,img2Encoding)
        
        distance = distance.to(CPU_DEVICE)
        
        results += distance.tolist()
        
        labels += label.squeeze().tolist()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.04s/it]


### Plotting ROC Curve

In [None]:
# CITATION: https://www.statology.org/plot-roc-curve-python/
fpr, tpr, threshold = metrics.roc_curve(labels,results)
auc = metrics.roc_auc_score(labels,results)

bestIndex = list(threshold).index(0.6744843125343323)

plt.plot(fpr,tpr,label="AUC="+str(round(auc,2)))
plt.scatter(fpr[bestIndex], tpr[bestIndex], marker='o', color='r',label="Optimum Classification Threshold="+str(0.6744843125343323))
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.title('Validation Data ROC Curve')
# plt.savefig('roc_curve_validation_triplet.png')
plt.show()

### Plotting Line Plot

In [None]:
df = pd.DataFrame()
df['results'] = results
df['yaxis'] = np.zeros(len(results))
df['label'] = labels

In [None]:
plt.scatter(df[df['label'] == 1]['results'], 
            df[df['label'] == 1]['yaxis'],
            c='r',
            alpha=0.1, label="Different Dog")

plt.scatter(df[df['label'] == 0]['results'], 
            df[df['label'] == 0]['yaxis'],
            c='g',
            alpha=0.1,label="Same Dog")
plt.legend()
plt.yticks([])
plt.xlabel('Similarity Score')
plt.title('Similarity Scores of Same and Different Dog Comparisons')
# plt.savefig('crossentropy_lineplot.png')
plt.show()

## Determining Optimum CutOff

In [8]:
import numpy as np
import pandas as pd
def Find_Optimal_Cutoff(target, predicted):
    # CITATION: https://stackoverflow.com/questions/28719067/roc-curve-and-cut-off-point-python
    """ Find the optimal probability cutoff point for a classification model related to event rate
    Parameters
    ----------
    target : Matrix with dependent or target data, where rows are observations

    predicted : Matrix with predicted data, where rows are observations

    Returns
    -------     
    list type, with optimal cutoff value
        
    """
    fpr, tpr, threshold = metrics.roc_curve(target, predicted)
    i = np.arange(len(tpr)) 
    roc = pd.DataFrame({'tf' : pd.Series(tpr-(1-fpr), index=i), 'threshold' : pd.Series(threshold, index=i)})
    roc_t = roc.iloc[(roc.tf-0).abs().argsort()[:1]]

    return list(roc_t['threshold']) 

Find_Optimal_Cutoff(labels, results)

[0.632717490196228]

In [9]:
metrics.f1_score(labels,[0 if i < 0.6744843125343323 else 1 for i in results])

0.8285714285714286

In [10]:
metrics.accuracy_score(labels,[0 if i < 0.6744843125343323 else 1 for i in results])

0.84