Import Libraries

In [1]:
import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from typing import Type, Any, Callable, Union, List, Dict, Optional, cast
from torch import Tensor
from collections import OrderedDict 

Build New Model

In [2]:
import torchvision.models as models
rnext = models.resnext101_32x8d(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/hub/checkpoints/resnext101_32x8d-8ba56ff5.pth


  0%|          | 0.00/340M [00:00<?, ?B/s]

In [3]:
children_counter = 0
for n,c in rnext.named_children():
    print("Children Counter: ",children_counter," Layer Name: ",n,)
    children_counter+=1

Children Counter:  0  Layer Name:  conv1
Children Counter:  1  Layer Name:  bn1
Children Counter:  2  Layer Name:  relu
Children Counter:  3  Layer Name:  maxpool
Children Counter:  4  Layer Name:  layer1
Children Counter:  5  Layer Name:  layer2
Children Counter:  6  Layer Name:  layer3
Children Counter:  7  Layer Name:  layer4
Children Counter:  8  Layer Name:  avgpool
Children Counter:  9  Layer Name:  fc


In [4]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [5]:
from PIL import Image
import cv2
import torchvision.transforms as transforms
import collections
import random
from random import seed, choice, sample
import os
import json
from google.colab.patches import cv2_imshow
import sys
import time
import matplotlib
import matplotlib.pylab as plt
plt.rcParams["axes.grid"] = False
from os.path import exists, join, basename, splitext
import csv
import glob

In [6]:
%matplotlib inline
import os
import sys
import json
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import skimage.transform
from PIL import Image
from collections import Counter
from tqdm.notebook import tqdm

import torch
torch.cuda.empty_cache()
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence
from nltk.translate.bleu_score import corpus_bleu

import warnings
warnings.filterwarnings("ignore")

In [7]:
import pandas as pd
import numpy as np
import re
from tqdm import tqdm

from gensim.models.fasttext import FastText
 
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
os.chdir('/content/drive/My Drive/Final Year Project/')

In [9]:
print(os.path.abspath('.'))

/content/drive/.shortcut-targets-by-id/1sz5w7NJWd5pUCRXsoinjyyZ-EUMh1tbM/Final Year Project


In [10]:
filename = '/content/drive/My Drive/Final Year Project/input_images_train.txt'
read_file = open(filename,"r") 

train_image_paths = []
train_image_names = []
#print("Output of Read function is ")
data_read = read_file.read()
data_into_list = data_read.split("\n")
#print(data_into_list)
for i in range(len(data_into_list)):
    if(len(data_into_list[i])>1):
        train_image_paths.append(data_into_list[i])
        path = data_into_list[i]
        name_list = path.split("/")
        name = name_list[len(name_list)-1]
        #print(path)
        train_image_names.append(name)
print("No. of images: ",len(train_image_paths), len(train_image_names))

No. of images:  23431 23431


In [11]:
filename = '/content/drive/My Drive/Final Year Project/input_images_val.txt'
read_file = open(filename,"r") 

val_image_paths = []
val_image_names = []
#print("Output of Read function is ")
data_read = read_file.read()
data_into_list = data_read.split("\n")
#print(data_into_list)
for i in range(len(data_into_list)):
    if(len(data_into_list[i])>1):
        val_image_paths.append(data_into_list[i])
        path = data_into_list[i]
        name_list = path.split("/")
        name = name_list[len(name_list)-1]
        #print(path)
        val_image_names.append(name)
print("No. of images: ",len(val_image_paths), len(val_image_names))

No. of images:  7750 7750


In [12]:
image_paths = []
image_names = []

for i in range(5):
  image_paths.append(train_image_paths[i])
  image_names.append(train_image_names[i])

print(len(image_paths), len(image_names))

5 5


In [13]:
# sets device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
class EncoderCNN(nn.Module):
    def __init__(self, enc_image_size=14):
        super(EncoderCNN, self).__init__()
        self.enc_image_size = enc_image_size

        # pretrained ImageNet ResNeXt-101
        resnext = models.resnext101_32x8d(pretrained=True)

        # remove linear and pool layers
        modules = list(resnext.children())[:-2]
        #print(modules)
        self.resnext = nn.Sequential(*modules)

        # resize image to fixed size using adaptive pool to allow input images of variable size
        self.adaptive_pool = nn.AvgPool2d((enc_image_size, enc_image_size))

        self.fine_tune()

    def fine_tune(self, fine_tune=True):
        """
        Allow or prevent computation of the gradients for convolutional blocks 2 through 4 of the image encoder.
        :param fine_tune: boolean
        """
        for param in self.resnext.parameters():
            param.requires_grad = False
        # if fine-tuning, fine-tune convolutional blocks 2 through 4
        for child in list(self.resnext.children())[5:]:
            for param in child.parameters():
                param.requires_grad = fine_tune
    
    def forward(self, images):
        """
        Forward propagation.
        :param images: images, a tensor of dim (batch_size, 3, image_size, image_size)
        :return enc_images: encoded repr of images, a tensor of dim (batch_size, enc_image_size, enc_image_size, 2048)
        """
        out = self.resnext(images)       # (batch_size, 2048, image_size/32, image_size/32)
        #out = self.adaptive_pool(out)   # (batch_size, 2048, enc_image_size, enc_image_size)
        #out = out.permute(0, 2, 3, 1)   # (batch_size, enc_image_size, enc_image_size, 2048)
        print(out.size())
        out = out.detach().to("cpu").numpy()
        #print(out.shape) 
        return out

In [16]:
fine_tune_encoder = False  # fine-tune encoder?
encoder_lr = 1e-4  # learning rate for encoder if fine-tuning

encoder = EncoderCNN()
encoder.fine_tune(fine_tune_encoder)
encoder_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None

In [None]:
path = '/content/drive/My Drive/Final Year Project/cnn_features/'
for i in range(len(image_paths)):
    #read and pre-process image
    #print("CNN Encoding for: ",image_names[i])
    name = image_names[i]
    name = name[:len(name)-4]
    print(name)
    img = Image.open(val_image_paths[i]).convert('RGB')
    img = np.array(img)
    img = cv2.resize(img, (256, 256))

    # sanity check
    assert img.shape == (256, 256, 3)
    assert np.max(img) <= 255

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    img = transform(img)    # (3, 256, 256)
    
    np_path = path + name
    #print(np_path)
    #print(img.shape)
    out = encoder(img.unsqueeze(0))
    #print(out.dtype, out.shape)
    np.save(np_path, out)
    #data['cnn_vector'][i] = out

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
VizWiz_train_00018433
VizWiz_train_00018434
VizWiz_train_00018435
VizWiz_train_00018436
VizWiz_train_00018437
VizWiz_train_00018438
VizWiz_train_00018439
VizWiz_train_00018440
VizWiz_train_00018441
VizWiz_train_00018442
VizWiz_train_00018443
VizWiz_train_00018444
VizWiz_train_00018445
VizWiz_train_00018446
VizWiz_train_00018447
VizWiz_train_00018448
VizWiz_train_00018449
VizWiz_train_00018450
VizWiz_train_00018451
VizWiz_train_00018452
VizWiz_train_00018453
VizWiz_train_00018454
VizWiz_train_00018455
VizWiz_train_00018456
VizWiz_train_00018457
VizWiz_train_00018458
VizWiz_train_00018459
VizWiz_train_00018460
VizWiz_train_00018461
VizWiz_train_00018462
VizWiz_train_00018463
VizWiz_train_00018464
VizWiz_train_00018465
VizWiz_train_00018466
VizWiz_train_00018467
VizWiz_train_00018468
VizWiz_train_00018469
VizWiz_train_00018470
VizWiz_train_00018471
VizWiz_train_00018472
VizWiz_train_00018473
VizWiz_train_00018474
VizWiz_trai