In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

from PIL import Image
import math
import numpy as np
import os
import json
from sklearn.model_selection import train_test_split
import cv2

In [None]:
import torch 
import torchvision.models as models
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

import torchvision

##l2-normalization

In [None]:
def l2n(x, eps=1e-6):
    return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)

In [None]:
class L2N(nn.Module):

    def __init__(self, eps=1e-6):
        super(L2N,self).__init__()
        self.eps = eps

    def forward(self, x):
        return l2n(x, eps=self.eps)
        
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'

##GeM

In [None]:
import torch.nn.functional as F

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

In [None]:
from torch.nn.parameter import Parameter

class GeM(nn.Module):

    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)
        
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

##Kiến trúc của mạng VGG16 có chỉnh sửa

In [None]:
import torch.nn as nn

class ImageRetrievalNet(nn.Module):
    
    def __init__(self, features, lwhiten, pool, whiten, meta):
        super(ImageRetrievalNet, self).__init__()
        self.features = nn.Sequential(*features)
        self.lwhiten = lwhiten
        self.pool = pool
        self.whiten = whiten
        self.norm = L2N()
        self.meta = meta
    
    def forward(self, x):
        # x -> features
        o = self.features(x)

        # TODO: properly test (with pre-l2norm and/or post-l2norm)
        # if lwhiten exist: features -> local whiten
        if self.lwhiten is not None:
            # o = self.norm(o)
            s = o.size()
            o = o.permute(0,2,3,1).contiguous().view(-1, s[1])
            o = self.lwhiten(o)
            o = o.view(s[0],s[2],s[3],self.lwhiten.out_features).permute(0,3,1,2)
            # o = self.norm(o)

        # features -> pool -> norm
        o = self.norm(self.pool(o)).squeeze(-1).squeeze(-1)

        # if whiten exist: pooled features -> whiten -> norm
        if self.whiten is not None:
            o = self.norm(self.whiten(o))

        # permute so that it is Dx1 column vector per image (DxN if many images)
        return o.permute(1,0)

In [None]:
from torchsummary import summary

def init_network(params):

    # parse params with default values
    architecture = params.get('architecture', 'vgg16')
    local_whitening = params.get('local_whitening', False)
    pooling = params.get('pooling', 'gem')
    regional = params.get('regional', False)
    whitening = params.get('whitening', False)
    mean = params.get('mean', [0.485, 0.456, 0.406])
    std = params.get('std', [0.229, 0.224, 0.225])
    pretrained = params.get('pretrained', True)

    # get output dimensionality size
    dim = 512

    # loading network from torchvision
    net_in = getattr(torchvision.models, architecture)(pretrained=False)
    print(summary(net_in, (3, 224, 224)))
    # initialize features
    # take only convolutions for features,
    # always ends with ReLU to make last activations non-negative
    if architecture.startswith('alexnet'):
        features = list(net_in.features.children())[:-1]
    elif architecture.startswith('vgg'):
        features = list(net_in.features.children())[:-1]
    elif architecture.startswith('resnet'):
        features = list(net_in.children())[:-2]
    elif architecture.startswith('densenet'):
        features = list(net_in.features.children())
        features.append(nn.ReLU(inplace=True))
    elif architecture.startswith('squeezenet'):
        features = list(net_in.features.children())
    else:
        raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))

    # initialize local whitening
    if local_whitening:
        lwhiten = nn.Linear(dim, dim, bias=True)
        # TODO: lwhiten with possible dimensionality reduce

        if pretrained:
            lw = architecture
            if lw in L_WHITENING:
                print(">> {}: for '{}' custom computed local whitening '{}' is used"
                    .format(os.path.basename(__file__), lw, os.path.basename(L_WHITENING[lw])))
                whiten_dir = os.path.join(get_data_root(), 'whiten')
                lwhiten.load_state_dict(model_zoo.load_url(L_WHITENING[lw], model_dir=whiten_dir))
            else:
                print(">> {}: for '{}' there is no local whitening computed, random weights are used"
                    .format(os.path.basename(__file__), lw))

    else:
        lwhiten = None
    
    # initialize pooling
    pool = GeM()
    # initialize regional pooling
    if regional:
        rpool = pool
        rwhiten = nn.Linear(dim, dim, bias=True)
        # TODO: rwhiten with possible dimensionality reduce

        if pretrained:
            rw = '{}-{}-r'.format(architecture, pooling)
            if rw in R_WHITENING:
                print(">> {}: for '{}' custom computed regional whitening '{}' is used"
                    .format(os.path.basename(__file__), rw, os.path.basename(R_WHITENING[rw])))
                whiten_dir = os.path.join(get_data_root(), 'whiten')
                rwhiten.load_state_dict(model_zoo.load_url(R_WHITENING[rw], model_dir=whiten_dir))
            else:
                print(">> {}: for '{}' there is no regional whitening computed, random weights are used"
                    .format(os.path.basename(__file__), rw))

        pool = Rpool(rpool, rwhiten)

    # initialize whitening
    if whitening:
        whiten = nn.Linear(dim, dim, bias=True)
        # TODO: whiten with possible dimensionality reduce

        if pretrained:
            w = architecture
            if local_whitening:
                w += '-lw'
            w += '-' + pooling
            if regional:
                w += '-r'
            if w in WHITENING:
                print(">> {}: for '{}' custom computed whitening '{}' is used"
                    .format(os.path.basename(__file__), w, os.path.basename(WHITENING[w])))
                whiten_dir = os.path.join(get_data_root(), 'whiten')
                whiten.load_state_dict(model_zoo.load_url(WHITENING[w], model_dir=whiten_dir))
            else:
                print(">> {}: for '{}' there is no whitening computed, random weights are used"
                    .format(os.path.basename(__file__), w))
    else:
        whiten = None

    # create meta information to be stored in the network
    meta = {
        'architecture' : architecture, 
        'local_whitening' : local_whitening, 
        'pooling' : pooling, 
        'regional' : regional, 
        'whitening' : whitening, 
        'mean' : mean, 
        'std' : std,
        'outputdim' : dim,
    }

    # create a generic image retrieval network
    net = ImageRetrievalNet(features, lwhiten, pool, whiten, meta)

    # initialize features with custom pretrained network if needed
    if pretrained and architecture in FEATURES:
        print(">> {}: for '{}' custom pretrained features '{}' are used"
            .format(os.path.basename(__file__), architecture, os.path.basename(FEATURES[architecture])))
        model_dir = os.path.join(get_data_root(), 'networks')
        net.features.load_state_dict(model_zoo.load_url(FEATURES[architecture], model_dir=model_dir))

    return net

In [None]:
state = torch.load('/content/drive/MyDrive/Yonin-IR/data/retrievalSfM120k-vgg16-gem-b4dcdc6.pth')

net_params = {}
net_params['architecture'] = state['meta']['architecture']
net_params['pooling'] = state['meta']['pooling']
net_params['local_whitening'] = state['meta'].get('local_whitening', False)
net_params['regional'] = state['meta'].get('regional', False)
net_params['whitening'] = state['meta'].get('whitening', False)
net_params['mean'] = state['meta']['mean']
net_params['std'] = state['meta']['std']
net_params['pretrained'] = False

net = init_network(net_params)
net.load_state_dict(state['state_dict'])
##Mạng VGG16 ban đầu 

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

<All keys matched successfully>

In [None]:
summary(net, (3, 224, 224))
#Mạng VGG16 có chỉnh sửa

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

##Tiền xử lý ảnh đầu vào 

In [None]:
def image_preprocess(img):
    img = img.resize((362, 362))
    img = img.convert("RGB")
    x = image.img_to_array(img)
    x = x.T
    x = np.expand_dims(x, axis=0)

    #x = preprocess_input(x)
    x = torch.Tensor(x)
    #print(x.size())
    return x

##Extract feature

In [None]:
def extract_vector(model, img_path):
    print("Xu ly: ", img_path)
    img = Image.open(img_path)
    img_tensor = image_preprocess(img)

    vector = model(img_tensor).cpu().data.squeeze()

    return vector

In [None]:
#dataset_path = "/content/oxbuild/"
dataset_path = '/content/drive/MyDrive/Yonin-IR/data/query_image_paris/'
data = {
        "paths": [],
        "names": [],
        "vectors": []
}

listdir = os.listdir(dataset_path)

for file_paris in listdir:

    full_path = dataset_path + file_paris
    
    try:
        image_vector = extract_vector(net, full_path)
        data["vectors"].append(image_vector.tolist())
        data["paths"].append(full_path)
        data['names'].append(file_paris.split('.')[0])
    except:
        print(full_path)

Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/defense_1.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/defense_4.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/defense_3.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/defense_5.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/defense_2.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/eiffel_4.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/eiffel_3.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/eiffel_1.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/eiffel_2.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/invalides_1.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/eiffel_5.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/invalides_4.jpg
Xu ly:  /content/drive/MyDrive/Yonin-IR/data/query_image_paris/invalides_3.jp

In [None]:
len(data['vectors'])

55

##Lưu file extract 

In [None]:
json_path = "/content/drive/MyDrive/Yonin-IR/data/data_paris_vgg16_pretrain_query_362.json"
with open(json_path, "w") as fp:
    json.dump(data, fp, indent=4)