In [2]:
import torchvision
from torchvision import datasets, transforms
import numpy as np
import torch
import torch.nn as nn
import pickle
from PIL import Image
import requests
from io import BytesIO
import urllib.request as url_req
from utils import urltoImg
import json

In [3]:
def get_model(device):
    model = torchvision.models.vgg11(pretrained=True)
    model.to(device)
    model.eval()
    return model

In [12]:
class Data(object):
    def __init__(self,model,device,testClassList,key):
        self.model = model
        self.device = device
        self.testClassList = testClassList
        self.id_label_map = pickle.load(open('pickled_id_label_imagenet_map', 'rb')) # load mapping as dict
        self.file_key = key
        
    def query_class_data(self,idx): 
        
        idx_id = self.id_label_map[idx]['id']
        wnid = str('n')+str(idx_id.split('-')[0])
        # print(wnid)
        synset_url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid='+str(wnid)
        # print(synset_url)
        urls = requests.get(synset_url).text.split('\r\n')     #clean for valid url
        label_class = self.id_label_map[idx]['label']               # get class name for given idx
        # print(label_class)
        return urls
        
    
    def preprocess_data(self,img):
#         for idx in self.testClassList:
#             urls = query_class_data(idx)
#             print(idx,len(urls))
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
        preprocess = transforms.Compose([transforms.Resize(256),
           transforms.CenterCrop(224),
           transforms.ToTensor(),
           normalize])
        
        return preprocess(img)
    
    def testModel(self,img,label):
        img_tsor = self.preprocess_data(img)
        img_tsor.unsqueeze_(0)
        img_tsor = img_tsor.to(self.device)

        label = torch.tensor(label,requires_grad=False)
        label = label.to(self.device)

        criterion = nn.CrossEntropyLoss()
        print(img_tsor.size())
        output = self.model(img_tsor)
        _,pred = torch.max(output,1)
        img_tsor.squeeze_(0)
#         imshow(img_tsor,wnid)
        org_label = float(label)
        output_idx = float(pred.cpu())
        output_class = self.id_label_map[float(pred.cpu())]['label']
        print('Original label = ',org_label)
        print('Output idx and class =',output_idx,',',output_class)
        return org_label,output_idx
    
            
    def make_valid_url_list(self):
        valid_urls = {}
        for idx in self.testClassList:
            urls = self.query_class_data(idx)
            np.random.shuffle(urls)
            i = 0
            valid_urls[idx] = []
            img = None
            flag = 0
            while (i<len(urls)):
                if (len(valid_urls[idx])==5):
                    break
                try:
                    var = url_req.urlopen(urls[i])
                    redirected_url = var.geturl()
                    if (str(redirected_url.split('/')[-1].split('.')[0]) == 'photo_unavailable'):   # this always results in 'photo_unavailable' if photo no longer exists
                        print(urls[i])
                        print('No photo')
                    else:
                        img = urltoImg(urls[i])
                        flag=1
                except:
                    print(urls[i])
                    print('Invalid url')
                
                if(flag==1):
                    orig_label,pred_label = self.testModel(img,idx)
                    if(orig_label != pred_label):
                        print('Diff label',i)
    
                    else:
                        print('Valid_url',i)
                        valid_urls[idx].append(urls[i])
                flag=0 
                i+=1
            file_name = str('valid_urls')+str(key)+str('.txt')
            with open(file_name,'a+') as f:
                f.write(json.dumps(valid_urls))
        return valid_urls
    

In [20]:
# class_id_arr1 = [8,9,21,31,35,49,63,75,84,86]
# class_id_arr2 = [93,100,105,113,121,130,144,148,151,282]
# class_id_arr3 = [293,295,298,309,311,314,360,417,430]
# class_id_arr4 = [438,457,470,480,491,543,546,568,578,587,609]
# class_id_arr5 = [620,629,637,668,696,706,721,773,806,845]

# len(class_id_arr)

In [21]:
# device = torch.device('cuda')

# model = get_model(device)


In [22]:
# key = 4
# dset = Data(model,device,class_id_arr4,key)
# val_urls = dset.make_valid_url_list()



http://html.rincondelvago.com/000286772.png
Invalid url
http://farm5.static.flickr.com/4017/4350136280_cdba608b25.jpg
torch.Size([1, 3, 224, 224])
Original label =  438.0
Output idx and class = 438.0 , beaker
Valid_url 1
http://farm3.static.flickr.com/2039/2037875598_27d4b6cc7b.jpg
torch.Size([1, 3, 224, 224])
Original label =  438.0
Output idx and class = 438.0 , beaker
Valid_url 2
http://farm4.static.flickr.com/3370/3199903331_12d0f604bd.jpg
torch.Size([1, 3, 224, 224])
Original label =  438.0
Output idx and class = 441.0 , beer glass
Diff label 3
http://www.cranberryinstitute.org/images/beakerpour.gif
Invalid url
http://www.inkycircus.com/jargon/images/beaker.jpg
Invalid url
http://farm4.static.flickr.com/3038/2585831990_286fe8a27f.jpg
torch.Size([1, 3, 224, 224])
Original label =  438.0
Output idx and class = 438.0 , beaker
Valid_url 6
http://farm4.static.flickr.com/3607/3445449920_8dbf1069f9.jpg
torch.Size([1, 3, 224, 224])
Original label =  438.0
Output idx and class = 438.0 , be

http://munfitnessblog.com/wp-content/uploads/2007/10/incline-dumbbell-press-for-upper-pectorals-1.jpg
torch.Size([1, 3, 224, 224])
Original label =  543.0
Output idx and class = 422.0 , barbell
Diff label 8
http://farm4.static.flickr.com/3069/2934858672_1b7507d0bf.jpg
torch.Size([1, 3, 224, 224])
Original label =  543.0
Output idx and class = 543.0 , dumbbell
Valid_url 9
http://www.instrumentalley.com/v/vspfiles/photos/DV-ORC-2T.jpg
Invalid url
http://static.howstuffworks.com/gif/eg-body1.jpg
torch.Size([1, 3, 224, 224])
Original label =  546.0
Output idx and class = 546.0 , electric guitar
Valid_url 1
http://static.flickr.com/54/154783464_89ef974050.jpg
torch.Size([1, 3, 224, 224])
Original label =  546.0
Output idx and class = 546.0 , electric guitar
Valid_url 2
http://www.racerxband.com/pictures/vegas-gh/images/082.%20electric-guitar-room4.jpg
Invalid url
http://www.tygerpipes.com/photo/156-0.jpg
Invalid url
http://www.a-violin.com/picImg/Electric-Guitar-Jazz-Series-2-Middle.jpg
Inv

In [23]:
# key = 5
# dset = Data(model,device,class_id_arr5,key)
# val_urls = dset.make_valid_url_list()

http://farm3.static.flickr.com/2056/2124889216_606ee06f19.jpg
No photo
http://farm1.static.flickr.com/22/34347807_8465803995.jpg
torch.Size([1, 3, 224, 224])
Original label =  620.0
Output idx and class = 526.0 , desk
Diff label 1
http://farm2.static.flickr.com/1119/622760513_fae61a4aac.jpg
torch.Size([1, 3, 224, 224])
Original label =  620.0
Output idx and class = 620.0 , laptop, laptop computer
Valid_url 2
http://www.allproducts.com/manufacture97/laptopbag/product4-s.jpg
Couldn't load image cannot identify image file <_io.BytesIO object at 0x7f22842257d8>
http://www.allproducts.com/manufacture97/laptopbag/product4-s.jpg
Invalid url
http://farm1.static.flickr.com/59/188110566_ebdd659f37.jpg
No photo
http://farm1.static.flickr.com/208/467381047_072a888cb0.jpg
torch.Size([1, 3, 224, 224])
Original label =  620.0
Output idx and class = 681.0 , notebook, notebook computer
Diff label 5
http://farm1.static.flickr.com/208/506828990_3b47f6cbc9.jpg
torch.Size([1, 3, 224, 224])
Original label =

http://farm4.static.flickr.com/3280/2725177594_7bc8116c69.jpg
torch.Size([1, 3, 224, 224])
Original label =  721.0
Output idx and class = 721.0 , pillow
Valid_url 0
http://farm4.static.flickr.com/3025/2595242338_6d1fa728bf.jpg
torch.Size([1, 3, 224, 224])
Original label =  721.0
Output idx and class = 721.0 , pillow
Valid_url 1
http://farm3.static.flickr.com/2230/2494808820_b514eb2c39.jpg
torch.Size([1, 3, 224, 224])
Original label =  721.0
Output idx and class = 474.0 , cardigan
Diff label 2
http://www.expodirect.es/images/ALMOHADA%20DE%20LANA.jpg
Invalid url
http://farm4.static.flickr.com/3499/3829042177_49958ef9de.jpg
No photo
http://farm1.static.flickr.com/88/266074805_384623dea0.jpg
No photo
http://farm4.static.flickr.com/3199/3287935704_f4ca778a59.jpg
No photo
http://farm4.static.flickr.com/3405/3185064411_af1aca8145.jpg
torch.Size([1, 3, 224, 224])
Original label =  721.0
Output idx and class = 533.0 , dishrag, dishcloth
Diff label 7
http://farm4.static.flickr.com/3296/304680399