In [1]:
import numpy as np
import pandas as pd
import torch
from PIL import Image
from torchvision import transforms
import os
import matplotlib.pyplot as plt

In [2]:
model = torch.hub.load('pytorch/vision:v0.9.0', 'vgg11', pretrained=True, force_reload=True)
model.eval()

Using cache found in C:\Users\Yi Wen/.cache\torch\hub\pytorch_vision_v0.9.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [8]:
# Load data: if Windows
path='../Image/All_Cropped'

images={}

directory=['Mountain', 'Beach', 
           'Mug', 'Banana', 
           'Car', 'Plane', 
           'Lighthouse', 'Church']

for dir in directory:
    images[dir]=[file for file in os.listdir(path+'/'+dir) if file.endswith(('jpeg', 'jpg'))]

In [9]:
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # this should be the mean and std for alexnet training dataset
])

In [10]:
# read labels 
with open("../imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

# read labels to wordnet synsets
ltw = pd.read_json('../imagenet_label_to_wordnet_synset.json').T
selected_labels = pd.read_csv('../selected_labels.csv',index_col=0) # selected labels

In [11]:
# map type to corresponding label's wordnet id
id_mapping = dict()
for i in selected_labels.index:
    id_mapping[selected_labels.loc[i]['type']] = selected_labels.loc[i]['id']
id_mapping

{'Banana': '07753592-n',
 'Beach': '09428293-n',
 'Car': '02814533-n',
 'Church': '03028079-n',
 'Lighthouse': '02814860-n',
 'Mountain': '09193705-n',
 'Mug': '03063599-n',
 'Plane': '02690373-n'}

In [12]:
# map type to corresponding label's readable label
readable_mapping = dict()
for i in selected_labels.index:
    readable_mapping[selected_labels.loc[i]['type']] = selected_labels.loc[i]['label']
readable_mapping

{'Banana': 'banana',
 'Beach': 'seashore, coast, seacoast, sea-coast',
 'Car': 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
 'Church': 'church, church building',
 'Lighthouse': 'beacon, lighthouse, beacon light, pharos',
 'Mountain': 'alp',
 'Mug': 'coffee mug',
 'Plane': 'airliner'}

Compute mean and std of all images

In [13]:
#type prob cate+filename
# all_img={'type':[], 'dir':[],'id_labels':[],'readable_labels':[]}
all_img={'type':[], 'dir':[],'prob':[], 'id_label':[], 'readable_label':[]}

In [14]:
for cate, filename in images.items():
    selected_id_label = id_mapping[cate] 
    selected_readable_label = readable_mapping[cate] 
    for f in filename:
        dir=os.path.join(path,cate,f)
        img=Image.open(dir)
        #print(dir)
        input_tensor=preprocess(img)
        input_batch = input_tensor.unsqueeze(0)

        if torch.cuda.is_available():
            input_batch= input_batch.to('cuda')
            model.to('cuda')

        with torch.no_grad():
            output = model(input_batch)
        
        probabilities = torch.nn.functional.softmax(output[0], dim=0)
        
        # Get all 1000 categories per image
        prob, catid = torch.topk(probabilities, probabilities.size(0))
#         temp1 = {}
        temp2 = {}
        for i in range(prob.size(0)):
#             temp1[categories[catid[i].item()]] = prob[i].item() # readable labels
            temp2[ltw['id'][catid[i].item()]] = prob[i].item() # wordnet synset id
        
        all_img['dir'].append(dir)
#         all_img['readable_labels'].append(temp1)
#         all_img['id_labels'].append(temp2)
        all_img['readable_label'].append(selected_readable_label)
        all_img['id_label'].append(selected_id_label)
        all_img['prob'].append(temp2[selected_id_label])
        
        
        if 'Mountain' in cate:
            all_img['type'].append('Mountain')
        if 'Beach' in cate:
            all_img['type'].append('Beach')
        if 'Mug' in cate:
            all_img['type'].append('Mug')
        if 'Banana' in cate:
            all_img['type'].append('Banana')
        if 'Car' in cate:
            all_img['type'].append('Car')
        if 'Plane' in cate:
            all_img['type'].append('Plane')
        if 'Lighthouse' in cate:
            all_img['type'].append('Lighthouse')
        if 'Church' in cate:
            all_img['type'].append('Church')

#         print('Typicality of ',f, ' = ',probabilities.max()*100)

In [15]:
all_img_df=pd.DataFrame(all_img)

In [16]:
all_img_df.head()

Unnamed: 0,type,dir,prob,id_label,readable_label
0,Mountain,../Image/All_Cropped\Mountain\1.jpg,0.020162,09193705-n,alp
1,Mountain,../Image/All_Cropped\Mountain\10.jpg,0.212414,09193705-n,alp
2,Mountain,../Image/All_Cropped\Mountain\11.jpg,0.695464,09193705-n,alp
3,Mountain,../Image/All_Cropped\Mountain\12.jpg,0.589797,09193705-n,alp
4,Mountain,../Image/All_Cropped\Mountain\13.jpg,0.088032,09193705-n,alp


In [17]:
# all_img_df.to_csv('vgg_final.csv')