In [None]:
import os
%matplotlib inline
from pytorch_metric_learning import losses, miners, samplers, trainers, testers, distances
from pytorch_metric_learning.utils import common_functions
import pytorch_metric_learning.utils.logging_presets as logging_presets
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
import numpy as np
import torchvision
from torchvision import datasets, transforms
import torch
import torch.nn as nn
from PIL import Image
import logging
import matplotlib.pyplot as plt
import umap.umap_ as umap
from cycler import cycler
import record_keeper
import pytorch_metric_learning
import pandas as pd
from tqdm import tqdm_notebook
from pytorch_metric_learning.utils.inference import MatchFinder, InferenceModel
from pytorch_metric_learning.distances import CosineSimilarity
from pytorch_metric_learning.utils import common_functions as c_f
from random import randint
from os.path import expanduser

logging.getLogger().setLevel(logging.INFO)
logging.info("VERSION %s"%pytorch_metric_learning.__version__)

# Veri setini hazirlama

In [None]:
df_shopee = pd.read_csv('/home/twoaday/research/data-sets/shopee/shopee-product-matching/train.csv')
df_shopee.head()

In [None]:
stanford_file_list = []
for parent_path, _, filenames in tqdm_notebook(os.walk(
    '/home/twoaday/research/data-sets/stanford/Stanford_Online_Products')):
    for f in filenames:
        if '.JPG' in f or '.jpg' in f:
            stanford_file_list.append(os.path.join(parent_path, f))
stanford_file_list[0]

df_stanford = []
for f in tqdm_notebook(stanford_file_list):
    label = f.split('/')[-1].split('_')[0]
    df_stanford.append({'label':f'st_{label}', 'image_path': f})
df = pd.DataFrame(df_stanford)
df.sample()

In [None]:
df.label = pd.Categorical(df.label)
df['label'] = df.label.cat.codes

In [None]:
df.head()

# Modeller

In [None]:
class MLP(nn.Module):
    # layer_sizes[0] is the dimension of the input
    # layer_sizes[-1] is the dimension of the output
    def __init__(self, layer_sizes, final_relu=False):
        super().__init__()
        layer_list = []
        layer_sizes = [int(x) for x in layer_sizes]
        num_layers = len(layer_sizes) - 1
        final_relu_layer = num_layers if final_relu else num_layers - 1
        for i in range(len(layer_sizes) - 1):
            input_size = layer_sizes[i]
            curr_size = layer_sizes[i + 1]
            if i < final_relu_layer:
                layer_list.append(nn.ReLU(inplace=False))
            layer_list.append(nn.Linear(input_size, curr_size))
        self.net = nn.Sequential(*layer_list)
        self.last_linear = self.net[-1]
        self._softmax = nn.Softmax(dim=1)
    def forward(self, x):
        out = self.net(x) 
        return self._softmax(out)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set trunk model and replace the softmax layer with an identity function
trunk = torchvision.models.wide_resnet101_2(pretrained=True)
trunk_output_size = trunk.fc.in_features
trunk.fc = common_functions.Identity()
trunk = torch.nn.DataParallel(trunk.to(device))
embedding_size = 256
# Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings
embedder = torch.nn.DataParallel(MLP([trunk_output_size, embedding_size]).to(device))

# Set optimizers
trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=0.00001, weight_decay=0.0001)
embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=0.0001, weight_decay=0.0001)



# Augmentation & Transformation

In [None]:
# Set the image transforms
train_transform = transforms.Compose([transforms.Resize((64, 64)),
                                    # transforms.RandomRotation(degrees=(0, 180)),
                                    transforms.RandomHorizontalFlip(0.5),
                                    # transforms.RandomVerticalFlip(0.5),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

val_transform = transforms.Compose([transforms.Resize((64, 64)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std=[0.229, 0.224, 0.225])])

## Veriyi train ve validation olarak ikiye ayiriyoruz

In [None]:
pids = df.label.unique()

val_set = np.random.choice(pids, 2000)
df_test = df.loc[df.label.isin(val_set)]
df_train = df.loc[~df.label.isin(val_set)]

logging.info(f'Train size: {len(df_train)} Validation size: {len(df_test)}')

# Veriyi modele beslemek icin data seti olusturuyoruz

In [None]:
class ProductImageDataSet(torch.utils.data.Dataset):
    def __init__(self, data_set, train, transform):
        self.data = data_set.image_path.values
        self.targets = data_set.label.values
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):            
        if isinstance(index, slice):
            img, target = self.data[index.start], self.targets[index.start]
            img = Image.open(img).convert('RGB')
            if self.transform is not None:
                img = self.transform(img)
            return img.unsqueeze(0).to(device)
        else:
            img, target = self.data[index], self.targets[index]
            img = Image.open(img).convert('RGB')
            if self.transform is not None:
                img = self.transform(img)
            return img, target

In [None]:
train_dataset = ProductImageDataSet(df_train, True, train_transform)
val_dataset = ProductImageDataSet(df_test,  False, val_transform)

## Hyperparameters

In [None]:
# Set the loss function
loss = losses.CircleLoss(m=0.4)

# Set the mining function
miner = miners.MultiSimilarityMiner(epsilon=0.1)

# Set the dataloader sampler
sampler = samplers.MPerClassSampler(train_dataset.targets, 
                                    m=2, 
                                    length_before_new_iter=len(train_dataset))

# Set other training parameters
batch_size = 32
num_epochs = 20

# Package the above stuff into dictionaries.
models = {"trunk": trunk, "embedder": embedder}
optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer}
loss_funcs = {"metric_loss": loss}
mining_funcs = {"tuple_miner": miner}

## Gorsellestirme ve loglama

In [None]:
model_name = f'wide_resnet101_2_circle_softmax_{batch_size}_{embedding_size}'
HOME = expanduser("~")
DL_MODELS_PATH = HOME + f'/trained_models/matching/images/{model_name}'

record_keeper, _, _ = logging_presets.get_record_keeper("/tmp/",
                f"/home/twoaday/tensorboard_logs/project_kusanagi/images/{model_name}")
hooks = logging_presets.get_hook_container(record_keeper)
dataset_dict = {"val": val_dataset}
model_folder = DL_MODELS_PATH

def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args):
    logging.info("UMAP plot for the {} split and label set {}".format(split_name, keyname))
    label_set = np.unique(labels)
    num_classes = len(label_set)
    fig = plt.figure(figsize=(20, 15))
    plt.gca().set_prop_cycle(cycler("color", [plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes)]))
    for i in range(num_classes):
        idx = labels == label_set[i]
        plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1)   
    plt.show()

# Create the tester
tester = testers.GlobalEmbeddingSpaceTester(end_of_testing_hook = hooks.end_of_testing_hook, 
                                            visualizer = umap.UMAP(), 
                                            visualizer_hook = visualizer_hook,
                                            dataloader_num_workers = 8,
                                            accuracy_calculator=AccuracyCalculator(k="max_bin_count"))

end_of_epoch_hook = hooks.end_of_epoch_hook(tester, 
                                            dataset_dict, 
                                            model_folder, 
                                            test_interval = 1,
                                            patience = 1)

# Training

In [None]:
trainer = trainers.MetricLossOnly(models,
                                optimizers,
                                batch_size,
                                loss_funcs,
                                mining_funcs,
                                train_dataset,
                                sampler=sampler,
                                dataloader_num_workers = 16,
                                end_of_iteration_hook = hooks.end_of_iteration_hook,
                                end_of_epoch_hook = end_of_epoch_hook)

In [None]:
trainer.train(num_epochs=num_epochs)

# Resimleri bastirmak icin

In [None]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

inv_normalize = transforms.Normalize(
   mean= [-m/s for m, s in zip(mean, std)],
   std= [1/s for s in std]
)

def imshow(img, figsize=(8, 4)):
    img = inv_normalize(img)
    npimg = img.numpy()
    plt.figure(figsize = figsize)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Egittigimiz modeli yukluyoruz

In [None]:
class CombinedVectorizer(nn.Module):
    def __init__(self, trunk_model, embedder):
        super(CombinedVectorizer, self).__init__()
        self._trunk = trunk_model
        self._embedder = embedder

    def forward(self, title):
        x = self._trunk(title)
        return self._embedder(x)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set trunk model and replace the softmax layer with an identity function
trunk = torchvision.models.wide_resnet101_2(pretrained=True)
trunk_output_size = trunk.fc.in_features
trunk.fc = common_functions.Identity()
embedding_size = 256

# Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings
embedder = MLP([trunk_output_size, embedding_size]).to(device)
trunk_path = f'/home/twoaday/trained_models/matching/images/{model_name}/trunk_11.pth'
embedder_path = f'/home/twoaday/trained_models/matching/images/{model_name}/embedder_11.pth'
trunk.load_state_dict(torch.load(trunk_path, map_location=device))
embedder.load_state_dict(torch.load(embedder_path, map_location=device))

model = CombinedVectorizer(trunk, embedder).to(device)
model.eval()

# Resim arama

In [None]:
transform = transforms.Compose([transforms.ToTensor(),
                    transforms.Normalize(mean=mean, std=std)])
labels_to_indices = c_f.get_labels_to_indices(val_dataset.targets)

In [None]:
match_finder = MatchFinder(distance=CosineSimilarity(), threshold=0.7)
inference_model = InferenceModel(model, match_finder=match_finder, batch_size=1)
inference_model.train_indexer(val_dataset, embedding_size)

In [None]:
index_no = randint(0, len(val_dataset))
img = val_dataset[index_no][0].unsqueeze(0)
print("query image")
imshow(torchvision.utils.make_grid(img))
indices, distances = inference_model.get_nearest_neighbors(img.to(device), k=10)
nearest_imgs = [val_dataset[i][0] for i in indices[0]]
print("nearest images")
imshow(torchvision.utils.make_grid(nearest_imgs))