## Note: You may need to adjust some paths in the following code. I moved some stuff around last minute!

In [206]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os

from overlapped_classes import overlapped_classes
import json
from PIL import Image
import scipy

In [65]:
with open("../mappings/objectnet_to_imagenet_1k.json") as f:
    data = json.load(f)

with open("../mappings/pytorch_to_imagenet_2012_id.json") as f:
    idxMap = json.load(f)
        
with open("../mappings/folder_to_objectnet_label.json") as f:
    folderMap = json.load(f)

with open('imagenet_classes.txt') as f:
# with open('../mappings/imagenet_to_label_2012_v2.txt') as f:    
    labels = [line.strip() for line in f.readlines()]    

In [203]:
# inverting the folderMap
dirMap = {}
for u, v in folderMap.items():
    dirMap[v]= u

In [198]:
from torchvision import models
dir(models)

# models.googlenet(pretrained=True)] #,

models_ = [models.googlenet(pretrained=True), models.alexnet(pretrained=True), models.vgg19(pretrained=True), 
           models.resnet152(pretrained=True), models.inception_v3(pretrained=True), 
           models.mnasnet1_0(pretrained=True)] #, models.resnext101_32x8d(pretrained=True), models.densenet161(pretrained=True)]



In [74]:

# model = models.alexnet(pretrained=True)
# model = models.resnet152(pretrained=True)
# model = models.inception_v3(pretrained=True)
# model = models.googlenet(pretrained=True)

# for model_name in models_:
#     model = eval(model_name)
#     model.eval()

In [204]:
def predict_image(image_path, box, draw=False, kind='Box'):
    # Define transformations for the image, should (note that imagenet models are trained with image size 224)
    transform = transforms.Compose([
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
#         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))    
    ])
    
    
    if kind == 'Crop':
        transform = transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
    #         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))    
        ])
    
    
    img = Image.open(image_path).convert("RGB")

    if kind == 'Box':
        xl, yl, xr, yr = box
        img = img.crop((xl, yl, xr, yr))   #((left, top, right, bottom)) 
        img = img.resize((224, 224))
        


    img_t = transform(img).float()

    if draw:
        plt.imshow(img) #img_t.permute((2,1,0)) )
        plt.show()

    image_tensor = img_t.unsqueeze_(0)

    if torch.cuda.is_available():
        image_tensor.cuda()

    # Predict the class of the image
    output = model(image_tensor)
    
    confs, indices = torch.sort(output.data, descending=True)
    confs[0] = confs[0]/sum(confs[0])
    return np.abs(confs[0][:5]), indices[0][:5]  # index

In [202]:


for model in models_:
    model_name = model._get_name() #eval(model_name)
    model.eval()

    print(model_name + '\n')
    
    res_top1 = []
    res_top5 = []

    save_file = './outputs/' + model_name + '.txt'

    for n, category in enumerate(data):

        txtfile = '../' + dirMap[category] + '.txt'
        if not os.path.exists(txtfile):
            continue

        with open(txtfile) as f:
            boxes = [line.strip() for line in f.readlines()]    

        # writing to the file
        with open(save_file, 'a') as ff:
            ff.write(f"{category} ---------------------------------------------------------------- \n")


        count = [0, 0]
        lines = 0
        for im in boxes[0:]:
            ss = im.split(' ')
            fName = ss[0]
            if len(ss) > 1:
                lines += 1
                coords = (int(i) for i in ss[1:] if i)
                confs, idx = predict_image(os.path.join('../images/' + dirMap[category] + '/', fName), coords, False, kind='Crop')

                # top 1
                count[0] += 1 if labels[idx[0]] in data[category] else 0  

                # top 5
                flag = False
                for i in idx[0:]:
                      flag = flag or (labels[i] in data[category])
                count[1] += 1 if flag else 0

                # writing to the file
                with open(save_file, 'a') as ff:
                    ff.write(f"{fName} {int(idx[0])} {int(idx[1])} {int(idx[2])} {int(idx[3])} {int(idx[4])}           {confs[0]:.2f} {confs[1]:.2f} {confs[2]:.2f} {confs[3]:.2f} {confs[4]:.2f} \n")

        accs = np.array(count)*100/lines

        print(f"{n} -> {category}: top 1: {accs[0]:.2f}  -  top 5: {accs[1]:.2f}           [num imgs: {lines}]")  

        with open(save_file, 'a') as ff:
            ff.write(f"{n} -> {category}: top 1: {accs[0]:.2f}  -  top 5: {accs[1]:.2f}           [num imgs: {lines}] \n")  

        res_top1.append(accs[0])        
        res_top5.append(accs[1])

    print(sum(res_top1)/len(res_top1))
    print(sum(res_top5)/len(res_top5))

GoogLeNet

0 -> Alarm clock: top 1: 15.97  -  top 5: 29.17           [num imgs: 144]
1 -> Backpack: top 1: 7.66  -  top 5: 20.43           [num imgs: 235]
2 -> Banana: top 1: 38.30  -  top 5: 47.23           [num imgs: 235]
3 -> Band Aid: top 1: 10.66  -  top 5: 28.69           [num imgs: 244]
4 -> Basket: top 1: 4.42  -  top 5: 19.89           [num imgs: 181]
5 -> Bath towel: top 1: 11.41  -  top 5: 28.86           [num imgs: 149]
6 -> Beer bottle: top 1: 5.02  -  top 5: 23.29           [num imgs: 219]
7 -> Bench: top 1: 0.76  -  top 5: 7.58           [num imgs: 132]
8 -> Bicycle: top 1: 6.12  -  top 5: 17.69           [num imgs: 147]
9 -> Binder (closed): top 1: 3.83  -  top 5: 18.58           [num imgs: 183]
10 -> Bottle cap: top 1: 5.24  -  top 5: 10.48           [num imgs: 248]
11 -> Bread loaf: top 1: 4.98  -  top 5: 16.29           [num imgs: 221]
12 -> Broom: top 1: 14.66  -  top 5: 36.21           [num imgs: 232]
13 -> Bucket: top 1: 6.42  -  top 5: 18.49           [num imgs: 

KeyboardInterrupt: 

print(sum(res_top1)/len(res_top1))
print(sum(res_top5)/len(res_top5))

In [157]:
# eval(model)
# int(idx[0])
print(f"{1.911333:.2f}")
# print(f'{val:.2f}')

confs, idx = predict_image(os.path.join('../images/' + dirMap[category] + '/', fName), coords, False)
# confs / sum(confs)

1.91


ValueError: not enough values to unpack (expected 4, got 0)

In [81]:
eval(model_name)

'models.vgg19(pretrained=True)'

In [82]:
models_ = [models.googlenet(pretrained=True), models.alexnet(pretrained=True), models.vgg19(pretrained=True), 
           models.resnet152(pretrained=True), models.inception_v3(pretrained=True), 
           models.mnasnet(pretrained=True), models.resnext101_32x8d(pretrained=True)]

TypeError: 'module' object is not callable

In [87]:
m = models.mnasnet1_0(pretrained=True)

Downloading: "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth" to /Users/ali/.cache/torch/checkpoints/mnasnet1.0_top1_73.512-f206786ef8.pth
100.0%


In [101]:
# dir(models)
models_[0]._get_name()

'GoogLeNet'