<div class="alert alert-info">
    <h1>Imports</h1>
    </div>

In [1]:
import os
import cv2
import tarfile
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from PIL import Image
from PIL import ImageFilter
import torchvision
import torchvision.models as models
from torch.nn import functional as F
from torch.autograd import Variable
from torchvision import transforms as trn
from torchvision.transforms import Lambda, ToTensor, Resize, Normalize, RandomCrop

<div class="alert alert-info">
    <h1>Load ResNet50 model with pretrained weights on Places365 database</h1>
    </div>

In [2]:
arch = 'resnet50'
model_file = '%s_places365.pth.tar' % arch

In [3]:
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

In [4]:
model = models.__dict__[arch](num_classes=365)

In [5]:
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)

In [6]:
model.fc = nn.Linear(2048, 67)

In [7]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

<div class="alert alert-info">
    <h1>Download the target database</h1>
    </div>

In [8]:
if('indoorCVPR_09.tar' not in os.listdir()):
    ! wget http://groups.csail.mit.edu/vision/LabelMe/NewImages/indoorCVPR_09.tar
    ! wget http://web.mit.edu/torralba/www/TrainImages.txt
    ! wget http://web.mit.edu/torralba/www/TestImages.txt
    tar = tarfile.open("indoorCVPR_09.tar", "r:")
    tar.extractall()
    tar.close()

<div class="alert alert-info">
    <h1>Data preparation and preprocessing</h1>
    </div>

In [9]:
dir_name = "Images"
with open("TrainImages.txt","r") as f:
    train_text = f.read().split("\n")
    
train_img = []
train_label = []

for name in train_text:
    img = Image.open(os.path.join(dir_name,name))
    if(img.mode != "RGB"):
        img = img.convert("RGB")
    train_label.append(name.split("/")[0])
    train_img.append(img.copy())
    img.close()
    
with open("TestImages.txt","r") as f:
    test_text = f.read().split("\n")
    
test_img = []
test_label = []
for name in test_text:
    img = Image.open(os.path.join(dir_name,name))
    if(img.mode != "RGB"):
        img = img.convert("RGB")
    test_label.append(name.split("/")[0])
    test_img.append(img.copy())
    img.close()

In [10]:
len(train_img),len(test_img)

(5360, 1340)

In [11]:
np.unique(train_label)

array(['airport_inside', 'artstudio', 'auditorium', 'bakery', 'bar',
       'bathroom', 'bedroom', 'bookstore', 'bowling', 'buffet', 'casino',
       'children_room', 'church_inside', 'classroom', 'cloister',
       'closet', 'clothingstore', 'computerroom', 'concert_hall',
       'corridor', 'deli', 'dentaloffice', 'dining_room', 'elevator',
       'fastfood_restaurant', 'florist', 'gameroom', 'garage',
       'greenhouse', 'grocerystore', 'gym', 'hairsalon', 'hospitalroom',
       'inside_bus', 'inside_subway', 'jewelleryshop', 'kindergarden',
       'kitchen', 'laboratorywet', 'laundromat', 'library', 'livingroom',
       'lobby', 'locker_room', 'mall', 'meeting_room', 'movietheater',
       'museum', 'nursery', 'office', 'operating_room', 'pantry',
       'poolinside', 'prisoncell', 'restaurant', 'restaurant_kitchen',
       'shoeshop', 'stairscase', 'studiomusic', 'subway', 'toystore',
       'trainstation', 'tv_studio', 'videostore', 'waitingroom',
       'warehouse', 'winecellar

In [12]:
prepare_train = trn.Compose([
    Lambda(lambda imgs : [Resize((256,256))(img) for img in imgs]),
    Lambda(lambda imgs : [[Resize((224,224))(img), (RandomCrop(224))(img), \
                        Resize((224,224))(img.filter(ImageFilter.GaussianBlur(radius=np.random.normal(0,2.5)))), \
                        Resize((224,224))(img.transpose(Image.FLIP_LEFT_RIGHT))] for img in imgs]),
    Lambda(lambda imgs : [ToTensor()(img) for l in imgs for img in l]),
    Lambda(lambda imgs : torch.stack([Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) \
                                          for img in imgs]))
])

prepare_test = trn.Compose([
    Lambda(lambda imgs : [ToTensor()(Resize((224,224))(img)) for img in imgs]),
    Lambda(lambda imgs : torch.stack([Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) \
                                          for img in imgs]))
])
y_train = pd.get_dummies(sum([[label]*4 for label in train_label],[])).values

X_train = Variable(prepare_train(train_img))

y_test = pd.get_dummies(sum([[label]*4 for label in test_label],[])).values

X_test = Variable(prepare_test(test_img))

X_train.shape,y_train.shape,X_test.shape,y_test.shape

(torch.Size([21440, 3, 224, 224]),
 (21440, 67),
 torch.Size([1340, 3, 224, 224]),
 (5360, 67))

<div class="alert alert-info">
    <h1>Training the model</h1>
    </div>

In [13]:
# logit = model.forward(X_train)
# h_x = F.softmax(logit, 1).data.squeeze()
# probs, idx = h_x.sort(0, True)