In [1]:
import torch
from torch import optim, nn
from torchvision import models, transforms
model = models.vgg16(pretrained=True)

In [2]:
class FeatureExtractor(nn.Module):
    def __init__(self, model):
        super(FeatureExtractor, self).__init__()
            # Extract VGG-16 Feature Layers
        self.features = list(model.features)
        self.features = nn.Sequential(*self.features)
            # Extract VGG-16 Average Pooling Layer
        self.pooling = model.avgpool
            # Convert the image into one-dimensional vector
        self.flatten = nn.Flatten()
            # Extract the first part of fully-connected layer from VGG16
        self.fc = model.classifier[0]

    def forward(self, x):
        # It will take the input 'x' until it returns the feature vector called 'out'
        out = self.features(x)
        out = self.pooling(out)
        out = self.flatten(out)
        out = self.fc(out) 
        return out 



In [3]:
# Initialize the model
model = models.vgg16(pretrained=True)
new_model = FeatureExtractor(model)

# Change the device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)

In [4]:

from tqdm import tqdm
import numpy as np
from os import listdir
from os.path import isfile, join
import cv2

# Transform the image, so it becomes readable with the model
transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.CenterCrop(512),
  transforms.Resize(448),
  transforms.ToTensor()                              
])

# Will contain the feature
features = []

mypath="data/not_perfectly_detected_ears/train"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
# Iterate each image
for i in tqdm(onlyfiles):
    # Set the image path
    path = mypath+"/"+i
    # Read the file
    img = cv2.imread(path)
    # Transform the image
    img = transform(img)
    # Reshape the image. PyTorch model reads 4-dimensional tensor
    # [batch_size, channels, width, height]
    img = img.reshape(1, 3, 448, 448)
    img = img.to(device)
    # We only extract features, so we don't need gradient
    with torch.no_grad():
        # Extract the feature from the image
        feature = new_model(img)
    # Convert to NumPy Array, Reshape it, and save it to features variable
    features.append(feature.cpu().detach().numpy().reshape(-1))

# Convert to NumPy Array
features = np.array(features)
feature

100%|██████████| 1005/1005 [02:31<00:00,  6.65it/s]


tensor([[ 0.1816, -0.1406,  0.1905,  ..., -0.6663,  0.0574, -0.2019]],
       device='cuda:0')

In [10]:
featureDict

{'0001.jpg': array([ 0.245695  , -0.23641974,  0.1973161 , ..., -0.6961558 ,
         0.02783814, -0.16264307], dtype=float32),
 '0002.jpg': array([ 0.20687886, -0.14944386,  0.18758002, ..., -0.5914941 ,
         0.11307216, -0.24200231], dtype=float32),
 '00022.jpg': array([ 0.21789366, -0.09807745,  0.19267577, ..., -0.62539023,
         0.08971836, -0.21564236], dtype=float32),
 '00023.jpg': array([ 0.1740787 , -0.12054561,  0.1823278 , ..., -0.6533996 ,
         0.06401177, -0.25407138], dtype=float32),
 '0003.jpg': array([ 0.20574185, -0.15754595,  0.17279983, ..., -0.52520186,
         0.09601296, -0.27300128], dtype=float32),
 '0004.jpg': array([ 0.24312703, -0.18167822,  0.20284687, ..., -0.6660891 ,
         0.07719962, -0.21601799], dtype=float32),
 '0005.jpg': array([ 0.1557489 , -0.21807675,  0.21640985, ..., -0.5535372 ,
        -0.01554387, -0.21550915], dtype=float32),
 '0006.jpg': array([ 0.19065505, -0.10884919,  0.24523737, ..., -0.6440313 ,
         0.00277602, -0.1

In [12]:
from csv import reader

featureDict=dict(zip(onlyfiles,features))

featureWithIdsDict=dict()
i=0
with open('data/perfectly_detected_ears/annotations/recognition/ids.csv', 'r') as read_obj:
    csv_reader = reader(read_obj)
    # Iterate over each row in the csv using reader object
    for row in csv_reader:
        try:
            img,id=row
            img = img.replace(".png",".jpg")
            img = img.split("/")
            if(img[0]=="train"):
                f=featureDict[img[1]]
                featureWithIdsDict[img[1]]={"features":f,"id":id}
                i+=1
        except Exception:
            print(Exception)
            pass
featureWithIdsDict

<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>
<class 'Exception'>


{'0001.jpg': {'features': array([ 0.245695  , -0.23641974,  0.1973161 , ..., -0.6961558 ,
          0.02783814, -0.16264307], dtype=float32),
  'id': '100'},
 '0002.jpg': {'features': array([ 0.20687886, -0.14944386,  0.18758002, ..., -0.5914941 ,
          0.11307216, -0.24200231], dtype=float32),
  'id': '10'},
 '0003.jpg': {'features': array([ 0.20574185, -0.15754595,  0.17279983, ..., -0.52520186,
          0.09601296, -0.27300128], dtype=float32),
  'id': '11'},
 '0004.jpg': {'features': array([ 0.24312703, -0.18167822,  0.20284687, ..., -0.6660891 ,
          0.07719962, -0.21601799], dtype=float32),
  'id': '11'},
 '0005.jpg': {'features': array([ 0.1557489 , -0.21807675,  0.21640985, ..., -0.5535372 ,
         -0.01554387, -0.21550915], dtype=float32),
  'id': '11'},
 '0006.jpg': {'features': array([ 0.19065505, -0.10884919,  0.24523737, ..., -0.6440313 ,
          0.00277602, -0.1408849 ], dtype=float32),
  'id': '11'},
 '0007.jpg': {'features': array([ 0.19433554, -0.15774548

In [14]:
i=0
test_path="data/not_perfectly_detected_ears/test/"
res=[]
with open('data/perfectly_detected_ears/annotations/recognition/ids.csv', 'r') as read_obj:
    csv_reader = reader(read_obj)
    for row in csv_reader:
        img,id=row
        img = img.replace(".png",".jpg")
        img = img.split("/")
        if(img[0]=="test"):
            try:
                img = cv2.imread(test_path+img[1])
                # Transform the image
                img = transform(img)
                # Reshape the image. PyTorch model reads 4-dimensional tensor
                # [batch_size, channels, width, height]
                img = img.reshape(1, 3, 448, 448)
                img = img.to(device)
                # We only extract features, so we don't need gradient
                with torch.no_grad():
                    # Extract the feature from the image
                    feature = new_model(img)
                # Convert to NumPy Array, Reshape it, and save it to features variable
                f=feature.cpu().detach().numpy().reshape(-1)
                minDist=np.Infinity
                minEl=None
                intermidiateRes=dict()
                for img in featureWithIdsDict:
                    dist=np.linalg.norm(f - featureWithIdsDict[img]["features"])
                    if(dist<minDist):
                        minDist=dist
                        minEl=img
                    if featureWithIdsDict[img]["id"] not in intermidiateRes:
                        intermidiateRes[featureWithIdsDict[img]["id"]]=dist
                    else:
                        if intermidiateRes[featureWithIdsDict[img]["id"]]>dist:
                            intermidiateRes[featureWithIdsDict[img]["id"]]=dist
                newList=sorted(intermidiateRes.items(),reverse=False,key=lambda x:x[1])
                res.append((id,newList))
            except Exception:
                pass


In [None]:
i=0
rankRes=[[] for x in range(0,100)]
for el in res:
    for r in range(0,100):
        found=False
        for i in range(r+1):
            if(el[1][i][0]==el[0]):
                found=True
        if found:
            if rankRes[r] ==[]:
                rankRes[r]=0
            rankRes[r]=rankRes[r]+1
print(rankRes)
for r in range(0,100):
    print("Res rank{} {}/{} {}%".format(r+1,rankRes[r],len(res),(rankRes[r]/len(res))*100))
for r in range(0,100):
    print("{}".format((rankRes[r]/len(res))*100))

[15, 29, 34, 43, 50, 60, 67, 69, 74, 77, 84, 90, 101, 104, 107, 110, 114, 116, 120, 121, 124, 127, 130, 131, 133, 135, 137, 138, 142, 144, 148, 151, 152, 153, 156, 159, 162, 162, 166, 168, 171, 172, 174, 176, 180, 181, 182, 182, 183, 184, 188, 189, 191, 193, 195, 195, 199, 200, 204, 206, 210, 210, 210, 211, 211, 213, 214, 216, 217, 218, 219, 219, 222, 225, 225, 225, 228, 228, 231, 233, 233, 234, 235, 236, 236, 241, 241, 242, 242, 242, 242, 243, 244, 247, 247, 249, 249, 250, 250, 250]
Res rank1 15/250 6.0%
Res rank2 29/250 11.600000000000001%
Res rank3 34/250 13.600000000000001%
Res rank4 43/250 17.2%
Res rank5 50/250 20.0%
Res rank6 60/250 24.0%
Res rank7 67/250 26.8%
Res rank8 69/250 27.6%
Res rank9 74/250 29.599999999999998%
Res rank10 77/250 30.8%
Res rank11 84/250 33.6%
Res rank12 90/250 36.0%
Res rank13 101/250 40.400000000000006%
Res rank14 104/250 41.6%
Res rank15 107/250 42.8%
Res rank16 110/250 44.0%
Res rank17 114/250 45.6%
Res rank18 116/250 46.400000000000006%
Res rank19 12

In [None]:
i=0
test_path="data/perfectly_detected_ears/test/"
res=[]
from queue import PriorityQueue
from collections import Counter
with open('data/perfectly_detected_ears/annotations/recognition/ids.csv', 'r') as read_obj:
    csv_reader = reader(read_obj)
    for row in csv_reader:
        img,id=row
        img = img.split("/")
        if(img[0]=="test"):
            img = cv2.imread(test_path+img[1])
            # Transform the image
            img = transform(img)
            # Reshape the image. PyTorch model reads 4-dimensional tensor
            # [batch_size, channels, width, height]
            img = img.reshape(1, 3, 448, 448)
            img = img.to(device)
            # We only extract features, so we don't need gradient
            with torch.no_grad():
                # Extract the feature from the image
                feature = new_model(img)
            # Convert to NumPy Array, Reshape it, and save it to features variable
            f=feature.cpu().detach().numpy().reshape(-1)
            minDist=np.Infinity
            minEl=None
            q = PriorityQueue()
            c = dict()
            for img in featureWithIdsDict:
                dist=np.linalg.norm(f - featureWithIdsDict[img]["features"])
                id=featureWithIdsDict[img]["id"]
                if id not in c:
                    c[id]=(dist,1)
                else:
                    c[id]=(c[id][0]+dist, c[id][1]+1)
            res.append((id,min(c.items(), key=lambda x: x[1][0]/x[1][1])[0]))

In [None]:
i=0
for el in res:
    if(el[0]==el[1]):
        i+=1
print("Res {}/{}".format(i,len(res)))
print("Res {}%".format(i/len(res)))

Res 10/250
Res 0.04%


In [None]:
i=0
test_path="data/perfectly_detected_ears/test/"
res=[]
from queue import PriorityQueue
from collections import Counter
with open('data/perfectly_detected_ears/annotations/recognition/ids.csv', 'r') as read_obj:
    csv_reader = reader(read_obj)
    for row in csv_reader:
        img,id=row
        img = img.split("/")
        if(img[0]=="test"):
            img = cv2.imread(test_path+img[1])
            # Transform the image
            img = transform(img)
            # Reshape the image. PyTorch model reads 4-dimensional tensor
            # [batch_size, channels, width, height]
            img = img.reshape(1, 3, 448, 448)
            img = img.to(device)
            # We only extract features, so we don't need gradient
            with torch.no_grad():
                # Extract the feature from the image
                feature = new_model(img)
            # Convert to NumPy Array, Reshape it, and save it to features variable
            f=feature.cpu().detach().numpy().reshape(-1)
            minDist=np.Infinity
            minEl=None
            q = PriorityQueue()
            c = Counter()
            for img in featureWithIdsDict:
                dist=np.linalg.norm(f - featureWithIdsDict[img]["features"])
                id=featureWithIdsDict[img]["id"]
                q.put((dist,id))
            for i in range(1,100):
                first=q.get()
                c.update((first[1],))
            res.append((id,c.most_common()[0][0],c))

In [None]:
i=0
for el in res:
    if(el[0]==el[1]):
        i+=1
print("Res {}/{}".format(i,len(res)))
print("Res {}%".format(i/len(res)))

Res 25/250
Res 0.1%
