## Import Library

In [1]:
from urllib.request import urlretrieve
import os, time, sys
from os.path import join as ospj
from PIL import Image
from matplotlib import pyplot as plt
import imageio

## Crawling Images

Please change API keys your own.  
You can change `label_list` when you need to crawl specific classes.

```
label_list = ['chair', 'sofa', 'diningtable']
```

In [60]:
from flickrapi import FlickrAPI

# API key information
key = ""
secret = ""
wait_time = 1
flickr = FlickrAPI(key, secret, format='parsed-json')

# Set Parameter
license_number = [1,2,4,5,9,10]
extras = 'url_c, url_o, license, owner_name'
search_photo_number = 4000
label_list = ['chair']
label_excluded_list = ['diningtable', 'person', 'sofa']

# License Info
license_info = flickr.photos.licenses.getInfo()['licenses']['license']
license_info_print = {l_dict['id']: l_dict['url'] for l_dict in license_info}

# save_path
savedir = "./Dataset/VOC/"
savedir_metadata = "./Dataset/VOCmetadata/"

In [61]:
def photo_search(name, license_number, search_photo_number, excluded_list):
    result_list = []
    name = name + " not " + " not ".join(excluded_list)
    print(name)
    for license_id in license_number:
        result = flickr.photos.search(
            text = name,
            per_page = search_photo_number,
            media = 'photos',
            sort = 'relevance',
            safe_search = license_id,
            extras = extras,
            license = license_id,
        )
        result_list.extend(result['photos']['photo'])
    print(f"We find {len(result_list)} photos")
    return result_list

In [None]:
SEARCH_DICT = {}
for label in label_list:
    SEARCH_DICT[label] = {}
    search_results = photo_search(label, license_number, search_photo_number, label_excluded_list)
    for photo_dict in search_results:
        filepath = savedir + '/' + photo_dict['id'] + '.jpg'
        
        url_q = photo_dict.get('url_c', None)
        if url_q is None:
            url_q = photo_dict.get('url_o', None)
        urlretrieve(url_q, filepath)
        
        url = "https://www.flickr.com/photos/"+photo_dict['owner']+"/"+str(photo_dict['id'])
        license_name = "{},{},{},{}".format(photo_dict['id'],
                                            url,
                                            license_info[int(photo_dict.get('license', None))],
                                            photo_dict['ownername'])
        SEARCH_DICT[label][photo_dict['id']] = license_name

chair not diningtable not person not sofa
We find 2998 photos


In [48]:
len(SEARCH_DICT['chair'])

2997

## Model

In [28]:
entropy_threshold = 0.5
prob_threshold = 0.9

In [8]:
import torch
import torch.nn as nn
import importlib
import numpy as np

class TorchvisionNormalize():
    def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.mean = mean
        self.std = std

    def __call__(self, img):
        imgarr = np.asarray(img)
        proc_img = np.empty_like(imgarr, np.float32)

        proc_img[..., 0] = (imgarr[..., 0] / 255. - self.mean[0]) / self.std[0]
        proc_img[..., 1] = (imgarr[..., 1] / 255. - self.mean[1]) / self.std[1]
        proc_img[..., 2] = (imgarr[..., 2] / 255. - self.mean[2]) / self.std[2]

        return proc_img
    
def gap2d(x, keepdims=False):
    out = torch.mean(x.view(x.size(0), x.size(1), -1), -1)
    if keepdims:
        out = out.view(out.size(0), out.size(1), 1, 1)

    return out

In [9]:
CAT_LIST = ['aeroplane', 'bicycle', 'bird', 'boat',
            'bottle', 'bus', 'car', 'cat', 'chair',
            'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant',
            'sheep', 'sofa', 'train',
            'tvmonitor']

model = getattr(importlib.import_module("net.resnet50_cam"), 'CAM')()
cam_weights = torch.load('sess/cam_retrain.pth')
model.load_state_dict(cam_weights, strict=True)
model.eval()

In [40]:
from misc import imutils

def transform_img(img, img_normal, resize_long=(320, 640)):
    img = imutils.random_resize_long(img, resize_long[0], resize_long[1])
    img = img_normal(img).transpose((2,0,1))
    img = np.stack([img, np.flip(img, -1)], axis=0)
    img = torch.tensor(img).unsqueeze(0)
    return img

def th_delete(tensor, indices):
    mask = torch.ones(tensor.numel(), dtype=torch.bool)
    mask[indices] = False
    return tensor[mask]

In [None]:
resize_long=(320, 640)
img_normal = TorchvisionNormalize()
log_softmax = nn.LogSoftmax(dim=1)

file_list = os.listdir(savedir)

with torch.no_grad():
    model.cuda()
    for label in label_list:
        image_dict = SEARCH_DICT[label]
        cat_index = CAT_LIST.index(label)
        for image_id, license in image_dict.items():
            # Import Image and Target
            if image_id+".jpg" not in file_list:
                continue
                
            img = imageio.imread(ospj(savedir, image_id+".jpg"), as_gray=False, pilmode="RGB")
            img_original = img.copy()
            img = transform_img(img, img_normal)

            output = model(img[0].cuda(non_blocking=True), return_norelu=True)
            logits = gap2d(output[0].unsqueeze(0)).squeeze(0).cpu().detach()
            
            # Calculate Probability
            logits = torch.sigmoid(logits)
            logits_threshold = np.array(list(map(int, logits.numpy() > prob_threshold)))

            # Calculate Entropy
            label_value = logits[cat_index]
            logits = th_delete(logits, [cat_index]).unsqueeze(0)
            assert len(logits[0]) == len(CAT_LIST)-1
            entropy = torch.neg(log_softmax(logits)).sum(dim=1)/len(CAT_LIST)
            
            nonzero_count = np.count_nonzero(logits_threshold)
#             print(nonzero_count, logits_threshold[cat_index])
            if nonzero_count == 1 and logits_threshold[cat_index] == 1:
                print(image_id+".jpg")
                plt.figure(figsize=(4,4))
                plt.imshow(img_original)

#             nonzero_count = np.count_nonzero(logits_threshold)
#             if entropy > entropy_threshold or nonzero_count > 1:
#                 image_dict.pop(image_id, None)


### XML file

In [28]:
import xml.etree.ElementTree as elemTree

def indent(elem, level=0):
    i = "\n" + level*"  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i

def make_xml(image_id, label, license, savedir, savedir_metadata):
    filename = image_id + ".jpg"
    image_path = ospj(savedir, filename)
    metadata_path = ospj(savedir_metadata, image_id + ".xml")
    
    width, height = Image.open(image_path).size
    
    root = Element('annotation')
    SubElement(root, 'folder').text = 'VOC2012'
    SubElement(root, 'filename').text = filename
    SubElement(root, 'path').text = image_path
    
    source = SubElement(root, 'source')
    SubElement(source, 'license').text = license
    SubElement(source, 'image').text = 'flickr'

    size = SubElement(root, 'size')
    SubElement(size, 'width').text = str(width)
    SubElement(size, 'height').text = str(height)
    SubElement(size, 'depth').text = '1'

    SubElement(root, 'segmented').text = '0'

    obj = SubElement(root, 'object')
    SubElement(obj, 'name').text = label
    SubElement(obj, 'pose').text = 'Unspecified'
    SubElement(obj, 'truncated').text = '0'
    SubElement(obj, 'difficult').text = '0'

    indent(root)
        
    tree = ElementTree(root)
    tree.write(metadata_path)

In [None]:
for label in label_list:
    image_dict = SEARCH_DICT[label]
    for image_id, license in image_dict.items():
        make_xml(image_id, label, license, savedir, savedir_metadata)