# Python и машинное обучение: нейронные сети и компьютерное зрение

## Модуль 6. Поиск похожих изображений, эмбеддинги

Загрузим датасет CalTech101.

In [None]:
!pip install gdown

In [None]:
!mkdir ./datasets
!gdown https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp --output ./datasets/caltech101.tar.gz
!tar -xzf ./datasets/caltech101.tar.gz --directory ./datasets
!mv ./datasets/101_ObjectCategories ./datasets/caltech101
!rm -rf ./datasets/caltech101/BACKGROUND_Google
!echo 'All done!'

In [None]:
import os, shutil

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets,transforms
from torch.nn.functional import normalize

from torchinfo import summary
from torchmetrics import Accuracy, AUROC

from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F

from PIL import Image

import torchvision.models as models

import requests
imagenet_classes = requests.get('https://files.fast.ai/models/imagenet_class_index.json').json()

from numpy.linalg import norm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

device = "cuda" if torch.cuda.is_available() else \
    "mps" if torch.backends.mps.is_built() else "cpu"
device

In [None]:
!pip install torchinfo torchmetrics

In [None]:
model_full = models.resnet50(weights='DEFAULT').to(device)
print(model_full)

In [None]:
modules=list(model_full.children())[:-1]
model_no_fc=nn.Sequential(*modules)
for p in model_no_fc.parameters():
    p.requires_grad = False
    
print(model_no_fc)

In [None]:
summary(model_no_fc,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params"],
        device=device
       )

In [None]:
IMAGE_SIZE = (224,224)
BASE_DIR = './datasets/caltech101'

data_transforms = transforms.Compose([
    transforms.Resize(size=IMAGE_SIZE), # делаем все картинки квадратными
    transforms.ToTensor(), # преобразуем в тензор
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

class ImageFolderWithPaths(datasets.ImageFolder):
    def __getitem__(self, index):
        img, label = super().__getitem__(index)
        path = self.imgs[index][0]
        return (img, label ,path)

img_data = ImageFolderWithPaths(root=BASE_DIR,
                                  transform=data_transforms, 
                                  target_transform=None)

print(f"Total number of images: {len(img_data)}")

np.random.seed(20231221)
ix_random_image = np.random.choice(len(img_data))

img, label, path = img_data[ix_random_image]
print(f"Image filename: {img_data.imgs[ix_random_image]}")
display(transforms.ToPILImage()(img))

In [None]:
model_full.eval()
results = model_full(img.unsqueeze(0).to(device))

top = torch.sort(F.softmax(results, dim=1)[0] * 100, descending=True)
predictions = [f"{imagenet_classes[str(ix.cpu().item())][1]} - {pct:.2f}%" \
               for pct, ix in zip(*top) ][:5]
predictions

А теперь получим эмбеддинг:

In [None]:
fc_input = model_no_fc(img.unsqueeze(0).to(device))
print(fc_input.shape, f"Max: {fc_input.max()}, min: {fc_input.min()}")

fc_input = torch.flatten( fc_input, start_dim=1 )[0]

embedding = fc_input / torch.sqrt(fc_input.dot(fc_input)) # нормализуем
print(embedding)


#### ЗАДАНИЕ 

Напишите функцию, которая будет принимать на вход минибатч из изображений и возвращать pandas dataframe, содержащий имя файла в качестве индекса и 2048 признаков из ембеддинга. Названия фичей должны начинаться с префикса ```f...```, например, ```f0, f1, ..., f2048```.

In [None]:
def get_embeddings(imgs: torch.tensor, paths) -> pd.DataFrame:
    # ваш код здесь
    
    

In [None]:
BATCH_SIZE = 20
loader = DataLoader(dataset=img_data, batch_size=BATCH_SIZE, shuffle=True)

imgs, _, paths = next(iter(loader))

get_embeddings(imgs.to(device), paths)

In [None]:
%%time
get_embeddings(imgs, paths)

In [None]:
df = None
for imgs, _, paths in loader:
    df_embds = get_embeddings(imgs, paths)
    if df is None:
        df = df_embds
    else:
        df = pd.concat([df, df_embds])
        
df

In [None]:
!pip install scikit-learn

In [None]:
from sklearn.neighbors import NearestNeighbors

neighbors = NearestNeighbors(n_neighbors=10,
                             algorithm='brute',
                             metric='euclidean').fit(df)

In [None]:
np.random.seed(2023122102)
ix_random_image = np.random.choice(len(img_data))

img = Image.open(df.iloc[ ix_random_image ].name)
display(img)

In [None]:
distances, indices = neighbors.kneighbors(df.iloc[ [ix_random_image] ])
print(distances)
print(indices)
df.iloc[ indices[0] ]

In [None]:
fig = plt.figure(figsize=(20, 2))
for idx, (filename, row) in enumerate(df.iloc[ indices[0] ].iterrows()):
    ax = fig.add_subplot(1, 10, idx+1, xticks=[], yticks=[])
    ax.imshow(Image.open(row.name))
    class_ = os.path.split(os.path.split(filename)[0])[1]
    ax.set_title(class_)
    
plt.show()

### ЗАДАНИЕ

Загрузите в директорию с тетрадью любое изображение из интернета или с жесткого диска. Найдите похожие изображения в датасете CalTech101 и выведите их на экран.

In [None]:
# ваш код здесь

