<div class="alert alert-info">
    <h1>Imports</h1>
    </div>

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import cv2
import tarfile
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from collections import OrderedDict
import torch
import torch.nn as nn
from PIL import Image
from PIL import ImageFilter
from pytorchtools import EarlyStopping
import torchvision
import torchvision.models as models
from torch.nn import functional as F
from torch.autograd import Variable
from torchvision import transforms as trn
from torchvision.transforms import Lambda, ToTensor, Resize, Normalize, RandomCrop, CenterCrop

<div class="alert alert-info">
    <h1>Load ResNet50 model with pretrained weights on Places365 database</h1>
    </div>

In [59]:
arch = 'resnet50'
model_file = '%s_places365.pth.tar' % arch

In [60]:
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

In [61]:
model = models.__dict__[arch](num_classes=365)

In [62]:
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)

In [63]:
# for i,param in enumerate(model.parameters()):
#     if(i < 10):
#         param.requires_grad = False
model.fc = nn.Sequential(OrderedDict([
            ('fc', nn.Linear(2048,67)),
            ('output', nn.LogSoftmax(dim=1))
]))



In [52]:
ref_model = models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
ref_model.load_state_dict(state_dict)
ref_model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

<div class="alert alert-info">
    <h1>Download the target database</h1>
    </div>

In [8]:
if('indoorCVPR_09.tar' not in os.listdir()):
    ! wget http://groups.csail.mit.edu/vision/LabelMe/NewImages/indoorCVPR_09.tar
    ! wget http://web.mit.edu/torralba/www/TrainImages.txt
    ! wget http://web.mit.edu/torralba/www/TestImages.txt
    tar = tarfile.open("indoorCVPR_09.tar", "r:")
    tar.extractall()
    tar.close()

<div class="alert alert-info">
    <h1>Data preparation and preprocessing</h1>
    </div>

In [9]:
# create a validation set of size 30*67

with open("TrainImages.txt","r") as f:
    train_text = f.read().split("\n")
    
with open("TestImages.txt","r") as f:
    test_text = f.read().split("\n")
    
valid_text = []
for dire in os.listdir("Images/"):
    nb = 0
    for img in os.listdir("Images/"+dire):
        if(os.path.join(dire,img) not in train_text and os.path.join(dire,img) not in test_text and nb < 30):
            valid_text.append(os.path.join(dire,img))
            nb +=1

In [10]:
dir_name = "Images"

def create_set(text):

    imgs = []
    labels = []

    for name in text:
        img = Image.open(os.path.join(dir_name,name))
        if(img.mode != "RGB"):
            img = img.convert("RGB")
        labels.append(name.split("/")[0])
        imgs.append(img.copy())
        img.close()
        
    return imgs,labels
    
train_img,train_label = create_set(train_text)
test_img,test_label = create_set(test_text)
valid_img,valid_label = create_set(valid_text)

In [11]:
len(train_img),len(test_img),len(valid_img)

(5360, 1340, 1506)

In [12]:
np.unique(train_label)

array(['airport_inside', 'artstudio', 'auditorium', 'bakery', 'bar',
       'bathroom', 'bedroom', 'bookstore', 'bowling', 'buffet', 'casino',
       'children_room', 'church_inside', 'classroom', 'cloister',
       'closet', 'clothingstore', 'computerroom', 'concert_hall',
       'corridor', 'deli', 'dentaloffice', 'dining_room', 'elevator',
       'fastfood_restaurant', 'florist', 'gameroom', 'garage',
       'greenhouse', 'grocerystore', 'gym', 'hairsalon', 'hospitalroom',
       'inside_bus', 'inside_subway', 'jewelleryshop', 'kindergarden',
       'kitchen', 'laboratorywet', 'laundromat', 'library', 'livingroom',
       'lobby', 'locker_room', 'mall', 'meeting_room', 'movietheater',
       'museum', 'nursery', 'office', 'operating_room', 'pantry',
       'poolinside', 'prisoncell', 'restaurant', 'restaurant_kitchen',
       'shoeshop', 'stairscase', 'studiomusic', 'subway', 'toystore',
       'trainstation', 'tv_studio', 'videostore', 'waitingroom',
       'warehouse', 'winecellar

In [13]:
prepare_train = trn.Compose([
    Lambda(lambda imgs : [Resize((256,256))(img) for img in imgs]),
    Lambda(lambda imgs : [[Resize((224,224))(img), (RandomCrop(224))(img), \
                        Resize((224,224))(img.filter(ImageFilter.GaussianBlur(radius=np.random.normal(0,2.5)))), \
                        Resize((224,224))(img.transpose(Image.FLIP_LEFT_RIGHT))] for img in imgs]),
    Lambda(lambda imgs : [ToTensor()(img) for l in imgs for img in l]),
    Lambda(lambda imgs : torch.stack([Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) \
                                          for img in imgs]))
])

prepare_test = trn.Compose([
    Lambda(lambda imgs : [ToTensor()(CenterCrop(224)(img)) for img in imgs]),
    Lambda(lambda imgs : torch.stack([Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) \
                                          for img in imgs]))
])

label2index = {}

for i,label in enumerate(np.unique(train_label)):
    label2index[label] = i
    
tmp = sum([[label]*4 for label in train_label],[])

y_train = torch.LongTensor([label2index[label] for label in tmp])

X_train = prepare_train(train_img)

y_test = torch.LongTensor([label2index[label] for label in test_label])

X_test = prepare_test(test_img)

y_val = torch.LongTensor([label2index[label] for label in valid_label])

X_val = prepare_test(valid_img)

X_train.shape,y_train.shape,X_test.shape,y_test.shape,X_val.shape,y_val.shape

(torch.Size([21440, 3, 224, 224]),
 torch.Size([21440]),
 torch.Size([1340, 3, 224, 224]),
 torch.Size([1340]),
 torch.Size([1506, 3, 224, 224]),
 torch.Size([1506]))

<div class="alert alert-info">
    <h1>Training the model</h1>
    </div>

In [64]:
def train(data, model, optimizer,loss_func,checkpoint,patience, batch_size, iterations):
    
    early_stopping = EarlyStopping(checkpoint,patience)
    
    X_train,y_train,X_val, y_val = data
    
    i = 0
    j = 0

    train_losses = []

    valid_losses = []

    for e in range(iterations):

        model.train()

        ypred = model.forward(Variable(X_train[i:i+batch_size].cuda()))

        loss = loss_func(ypred, y_train[i:i+batch_size].cuda())

        i = (i+batch_size)%X_train.shape[0]

        if(e == 6*int(iterations/10)):
            for param_group in optimizer.param_groups:
                param_group['lr'] /= 10
        
        
#         loss = norm_l_SP(model,loss,2)
        
        optimizer.zero_grad()
        
        loss.backward()

        optimizer.step()

        train_losses.append(loss.item())

        model.eval()

        ypred_val = model.forward(Variable(X_val[j:j+int(batch_size/4)].cuda()))

        loss = loss_func(ypred_val, y_val[j:j+int(batch_size/4)].cuda())

        j = (j+int(batch_size/4))%X_val.shape[0]

        valid_losses.append(loss.item())

        early_stopping(loss.item(), model)

        if early_stopping.early_stop:
                print("Early stopping")
                break

        print("iteration {}".format(e))
        
    model.load_state_dict(torch.load(checkpoint))
    
    return model, train_losses, valid_losses


def norm_l(model,loss, norm):
    
    alpha = 0.001
    reg_loss = None
   
    for param in model.parameters():
        if reg_loss is None:
            if(norm == 2):
                reg_loss = param.data.norm(norm)**2
            else:
                reg_loss = param.data.norm(norm)
        else:
            if(norm == 2):
                reg_loss += param.data.norm(norm)**2
            else:
                reg_loss += param.data.norm(norm)

    return loss + alpha * reg_loss.data

def norm_l_SP(model,loss, norm): 
    
    alpha = 0.001
    beta = 0.001
    reg_loss = None
    
    for (name,ref_param), param in zip(ref_model.named_parameters(), model.parameters()):
        
        if reg_loss is None:
            if(norm == 2):
                reg_loss = (ref_param.data - param.data).norm(norm)**2
            else:
                reg_loss = (ref_param.data - param.data).norm(norm)
        
        if(name.startswith("fc")):
            if(norm == 2):
                reg_loss += beta * param.data.norm(norm)**2
            else:
                reg_loss += beta * param.data.norm(norm)
            
        else:
            if(norm == 2):
                reg_loss += (ref_param.data - param.data).norm(norm)**2
            else:
                reg_loss += (ref_param.data - param.data).norm(norm)

    return loss + alpha * reg_loss.data

In [65]:
idx = np.arange(X_train.shape[0])
np.random.shuffle(idx)

X_train = X_train[idx]
y_train = y_train[idx]

idx = np.arange(X_val.shape[0])
np.random.shuffle(idx)

X_val = X_val[idx]
y_val = y_val[idx]

data = (X_train,y_train,X_val, y_val)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
loss_func = nn.CrossEntropyLoss()
model, train_loss, valid_loss = train(data, model.cuda(), optimizer, loss_func,"checkpoint_nothing.pt",100, 64, 9000)

iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39
iteration 40
iteration 41
iteration 42
iteration 43
iteration 44
iteration 45
iteration 46
iteration 47
iteration 48
iteration 49
iteration 50
iteration 51
iteration 52
iteration 53
iteration 54
iteration 55
iteration 56
iteration 57
iteration 58
iteration 59
iteration 60
iteration 61
iteration 62
iteration 63
iteration 64
iteration 65
iteration 66
iteration 67
iteration 68
iteration 69
iteration 70
iteration 71
iteration 72
iteration 73
iteration 74
iteration 75
iteration 76
iteration

iteration 594
iteration 595
iteration 596
Early stopping


In [68]:
ypred = []

model.eval()
i = 0
batch_size = 8

with torch.no_grad():
    
    for e in range(int(X_test.shape[0]/batch_size)+1):
        ypred.append(model.forward(Variable(X_test[i:i+batch_size]).cuda()))
        i+=batch_size


In [69]:
pred = torch.stack([p for pred in ypred for p in pred])
pred.shape, y_test.shape

(torch.Size([1340, 67]), torch.Size([1340]))

In [70]:
print("accuracy is {0:.2f}%.".format(accuracy_score(y_test.data, torch.max(pred.data, 1)[1])*100))

accuracy is 68.06%.


In [71]:
prec,rec,f1,_ = precision_recall_fscore_support(y_test.data, torch.max(pred.data, 1)[1], average="macro")
print("precision: {0:.2f}%\nrecall: {0:.2f}%\nf1 score: {0:.2f}%".format(prec*100,rec*100,f1*100))

precision: 72.16%
recall: 72.16%
f1 score: 72.16%
