### Libraries

In [1]:
import json

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import networkx as nx

In [4]:
import os

In [5]:
import time
from dataclasses import dataclass
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
from libs.lizi.my_magi import MyMagiModel
from libs.lizi.my_magi.config import MagiConfig
from libs.lizi.my_magi.utils import UnionFind
from libs.lizi.my_magi.utils import read_image_as_np_array as read_image
from numpy.typing import NDArray
from PIL import Image
from rich.pretty import pprint as pp
from torchmetrics.functional.pairwise import pairwise_cosine_similarity
from transformers.modeling_utils import load_state_dict

In [6]:
torch.cuda.is_available()

True

### Uploading images

In [7]:
directory_path = "data/masi_mangas/Oshi no Ko/[Ai's fanclub] Vol. 3 Ch. 28"

In [8]:
images_bw = []
images_color = []

In [9]:
@dataclass
class ImageInfo:
    image: np.ndarray
    full_file_name: str
    
    def get_image_array(self):
        return self.image

In [10]:
# Заглушка для чтания файлов из папки с целой главой
for filename in os.listdir(directory_path):
    if filename.endswith("_bw.png"):
        full_path = os.path.join(directory_path, filename)
        try:
            img = np.asarray(Image.open(full_path).convert("RGB"))
            images_bw.append(ImageInfo(image=img, full_file_name=full_path))
        except Exception as e:
            print(f"Ошибка при открытии {full_path}: {e}")
    elif filename.endswith("_color.png"):
        full_path = os.path.join(directory_path, filename)
        try:
            img = np.asarray(Image.open(full_path).convert("RGB"))
            images_color.append(ImageInfo(image=img, full_file_name=full_path))
        except Exception as e:
            print(f"Ошибка при открытии {full_path}: {e}")

In [None]:
pp(images_bw[0])

In [None]:
# Пока не трогай ячейку
group_size = 10
images_bw_grouped = [images_bw[i : i + group_size] for i in range(0, len(images_bw), group_size)]
images_color_grouped = [
    images_color[i : i + group_size] for i in range(0, len(images_color), group_size)
]

In [None]:
type(images_bw_grouped[0][0])

In [None]:
images_bw_grouped[0][0].shape

### Model initialization

Загружаем pre-train веса из файла в dict

In [11]:
state_dict = load_state_dict(str(Path("models/magi/pytorch_model.bin").resolve()))
state_dict.keys()

dict_keys(['ocr_model.encoder.embeddings.cls_token', 'ocr_model.encoder.embeddings.position_embeddings', 'ocr_model.encoder.embeddings.patch_embeddings.projection.weight', 'ocr_model.encoder.embeddings.patch_embeddings.projection.bias', 'ocr_model.encoder.encoder.layer.0.attention.attention.query.weight', 'ocr_model.encoder.encoder.layer.0.attention.attention.key.weight', 'ocr_model.encoder.encoder.layer.0.attention.attention.value.weight', 'ocr_model.encoder.encoder.layer.0.attention.output.dense.weight', 'ocr_model.encoder.encoder.layer.0.attention.output.dense.bias', 'ocr_model.encoder.encoder.layer.0.intermediate.dense.weight', 'ocr_model.encoder.encoder.layer.0.intermediate.dense.bias', 'ocr_model.encoder.encoder.layer.0.output.dense.weight', 'ocr_model.encoder.encoder.layer.0.output.dense.bias', 'ocr_model.encoder.encoder.layer.0.layernorm_before.weight', 'ocr_model.encoder.encoder.layer.0.layernorm_before.bias', 'ocr_model.encoder.encoder.layer.0.layernorm_after.weight', 'ocr_mo

Считываем конфиг из локальной директории и инициализируем нашу модель с ним

In [12]:
config: MagiConfig = MagiConfig.from_json_file(Path("libs/lizi/my_magi/config.json").resolve())  # type: ignore
model = MyMagiModel(config)

Загружаем pre-train веса в модель

In [13]:
model.load_state_dict(state_dict, strict=False)

_IncompatibleKeys(missing_keys=[], unexpected_keys=['ocr_model.encoder.embeddings.cls_token', 'ocr_model.encoder.embeddings.position_embeddings', 'ocr_model.encoder.embeddings.patch_embeddings.projection.weight', 'ocr_model.encoder.embeddings.patch_embeddings.projection.bias', 'ocr_model.encoder.encoder.layer.0.attention.attention.query.weight', 'ocr_model.encoder.encoder.layer.0.attention.attention.key.weight', 'ocr_model.encoder.encoder.layer.0.attention.attention.value.weight', 'ocr_model.encoder.encoder.layer.0.attention.output.dense.weight', 'ocr_model.encoder.encoder.layer.0.attention.output.dense.bias', 'ocr_model.encoder.encoder.layer.0.intermediate.dense.weight', 'ocr_model.encoder.encoder.layer.0.intermediate.dense.bias', 'ocr_model.encoder.encoder.layer.0.output.dense.weight', 'ocr_model.encoder.encoder.layer.0.output.dense.bias', 'ocr_model.encoder.encoder.layer.0.layernorm_before.weight', 'ocr_model.encoder.encoder.layer.0.layernorm_before.bias', 'ocr_model.encoder.encoder

In [14]:
model.cuda() # type: ignore

MyMagiModel(
  (crop_embedding_model): ViTMAEModel(
    (embeddings): ViTMAEEmbeddings(
      (patch_embeddings): ViTMAEPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
    )
    (encoder): ViTMAEEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTMAELayer(
          (attention): ViTMAEAttention(
            (attention): ViTMAESelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTMAESelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTMAEIntermediate(
            (dense): Linear(in_features=768, out_features=

### Get embeddings

**image_bboxes** - list со вложенными lists, вложенный список - одна страница, где каждый элемент - np.array
**character_scores** - list с тензорами, тензор - одна страница, каждый элемент - для каждого bbox score
**crop_embeddings_for_batch** - list с тензорами, где каждый тензор - страница, а строка - для каждого bbox
**crop_bboxes** - list c тензорами, где каждый тензор - страница, а строка - для каждого bbox

In [None]:
# Сериализация тензора PyTorch в байты
# Требует доработки
pt_tensor_bytes = pt_tensor.numpy().tobytes()

# Сериализация массива NumPy в байты
np_array_bytes = np_array.tobytes()


In [None]:
# Создание списка с сериализованными байтами
# Требует доработки
data_list = [
    {"type": "pytorch", "data": pt_tensor_bytes},
    {"type": "numpy", "data": np_array_bytes}
]

# Сериализация списка в JSON
json_data = json.dumps(data_list)


In [15]:
crop_bboxes_bw = []
crop_embeddings_for_batch_bw = []
image_bboxes_bw = []
character_scores_bw = []
crop_bboxes_color = []
crop_embeddings_for_batch_color = []
image_bboxes_color = []
character_scores_color = []

In [None]:
# Тензор с эмбеддингами примеров окраски
samples_character = torch.stack([
    crop_embeddings_for_batch_color[0][0][7],
    crop_embeddings_for_batch_color[0][0][2],
    crop_embeddings_for_batch_color[0][0][3],
    crop_embeddings_for_batch_color[0][2][0],
    crop_embeddings_for_batch_color[0][2][3],
    crop_embeddings_for_batch_color[0][3][2],
    crop_embeddings_for_batch_color[0][3][3],
    crop_embeddings_for_batch_color[0][5][6],
    crop_embeddings_for_batch_color[0][9][1],
    crop_embeddings_for_batch_color[0][9][2],
])

In [None]:
Image.fromarray(image_bboxes_color[0][2][3])

In [None]:
type(images_color_grouped[0][0])

In [16]:
@dataclass
class CropBbox:
    image_bbox: np.ndarray # картиночка самого bboxa
    character_score: float # не нужен, но есть
    embeddings_for_batch: torch.Tensor # эмбеддинг для сравнения
    crop_bboxes_for: torch.Tensor # 4 координаты
    file_name: str # имя страницы исходной, иначе хрен сравним с раскраской

In [20]:
images_bw_for_everything = []

In [21]:
for batch in images_bw:
    with torch.no_grad():
        # Извлечение массива изображения и названия файла из каждого экземпляра ImageInfo в batch
        page_image = [batch.image]
        page_name = batch.full_file_name

        # Предполагается, что model.get_crops_and_embeddings() может принимать список массивов NumPy
        (
            batch_crop_bboxes,
            batch_crop_embeddings_for_batch,
            batch_image_bboxes,
            batch_character_scores,
        ) = model.get_crops_and_embeddings(page_image)

    num_rows = len(batch_crop_embeddings_for_batch[0])

    for i in range(num_rows):
        images_bw_for_everything.append(
            CropBbox(
                image_bbox=batch_image_bboxes[0][i],
                character_score=batch_character_scores[0][i],
                embeddings_for_batch=batch_crop_embeddings_for_batch[0][i],
                crop_bboxes_for=batch_crop_bboxes[0][i],
                file_name=page_name,
            )
        )
    # crop_bboxes_bw.extend(batch_crop_bboxes)
    # crop_embeddings_for_batch_bw.extend(batch_crop_embeddings_for_batch)
    # image_bboxes_bw.extend(batch_image_bboxes)
    # character_scores_bw.extend(batch_character_scores)

  return F.conv2d(input, weight, bias, self.stride,


In [None]:
pp(file_names_bw)

In [None]:
pp(image_bboxes_bw)

image_bboxes - list со вложенными lists, вложенный список - одна страница, где каждый элемент - np.array
character_scores - list с тензорами, тензор - одна страница, каждый элемент - для каждого bbox score
crop_embeddings_for_batch - list с тензорами, где каждый тензор - страница, а строка - для каждого bbox
crop_bboxes - list c тензорами, где каждый тензор - страница, а строка - для каждого bbox

In [None]:
for batch in images_color_grouped:
    with torch.no_grad():
        (
            batch_crop_bboxes,
            batch_crop_embeddings_for_batch,
            batch_image_bboxes,
            batch_character_scores,
        ) = model.get_crops_and_embeddings(batch)
    crop_bboxes_color.append(batch_crop_bboxes)
    crop_embeddings_for_batch_color.append(batch_crop_embeddings_for_batch)
    image_bboxes_color.append(batch_image_bboxes)
    character_scores_color.append(batch_character_scores)
    

In [None]:
for batch in images_bw_grouped:
    with torch.no_grad():
        (
            batch_crop_bboxes,
            batch_crop_embeddings_for_batch,
            batch_image_bboxes,
            batch_character_scores,
        ) = model.get_crops_and_embeddings(batch)
    crop_bboxes_bw.append(batch_crop_bboxes)
    crop_embeddings_for_batch_bw.append(batch_crop_embeddings_for_batch)
    image_bboxes_bw.append(batch_image_bboxes)
    character_scores_bw.append(batch_character_scores)
    

# Get matrix per page

### Проба с максимумом по главе

In [None]:
# Список для поиска совпадений с раскрашенными персами
compare_list = []

In [None]:
# Перебираем все эмбеддинги, которые упакованы по 10 страниц
for one_pack in range(len(crop_embeddings_for_batch_bw)):
    # Заходим в каждую страницу и объединяем все 10 страниц в батче в один тензор эмбеддингов
    for i in range(len(crop_embeddings_for_batch_bw[one_pack])):
        if i == 0:
            crop_embeds_bw = crop_embeddings_for_batch_bw[one_pack][0]
        else:
            crop_embeds_bw = torch.cat(
                (crop_embeds_bw, crop_embeddings_for_batch_bw[one_pack][i]), dim=0
            )

    # Матрица с косинусными совпадениями все-на-все
    pcs = pairwise_cosine_similarity(crop_embeds_bw, crop_embeds_bw)
    # Меняем единицы в главной диагонали на нули
    pcs = pcs.fill_diagonal_(0.0)
    # Получаем индексы всех максимумов - лучшие совпадения
    new_var = torch.argmax(pcs, dim=1)
    # Объединяем индексы лучших совпадений друг с другом попарно
    char_to = torch.cat(
        (new_var.unsqueeze(1), torch.arange(len(new_var)).cuda().unsqueeze(1)), dim=1
    )
    # Делаем граф из совпадающих вершин
    graphs_chapter_one_max = nx.Graph(char_to.tolist())

    # Объединяем все совпавшие вершины друг с другом  
    indixes_per_chapter = [
        list(c_) for c_ in nx.connected_components(graphs_chapter_one_max)
    ]

    # Создаём список compare_list и добавляем внутри него тензоры
    for c_k in indixes_per_chapter:
        for character_index in range(len(c_k)):
            num = int(c_k[character_index])
            if character_index == 0:
                first_compare_batch = crop_embeds_bw[num].unsqueeze(dim=0)
            else:
                first_compare_batch = torch.cat(
                (first_compare_batch, crop_embeds_bw[num].unsqueeze(dim=0)), dim=0
            )
        # Тензор с эмбендингами, внутри каждого тензора строки с эмбеддингами совпавших персонажей        
        compare_list.append(first_compare_batch)
    

In [None]:
compare_list[0].size()

---

### Сравнение samples со списками

In [None]:
my_dict = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}

if my_dict.get('key1') is not None:
    print("Key exists in the dictionary.")
else:
    print("Key does not exist in the dictionary.")

In [None]:
result_dict = {}

In [None]:
for one_tensor in compare_list:
    # Составляем матрицу и сравниваем с самплами
    pcs_samples = pairwise_cosine_similarity(one_tensor, samples_character)
    # Ищем сумму по всем значениям
    comp = torch.sum(pcs_samples, dim=0)
    # Ищем индекс максимального совпадения
    max_coincidence = int(torch.argmax(comp))
    if result_dict.get(max_coincidence) is not None:
        inter_res = torch.cat(
                (result_dict[max_coincidence], one_tensor), dim=0)
        result_dict[max_coincidence] = inter_res
    else:
        result_dict[max_coincidence] = one_tensor
    

In [None]:
pp(result_dict)

---

In [None]:
Image.fromarray(image_bboxes_bw[1][1][1])


In [None]:
first_inds = torch.argmax(dict_pcs[0], dim=1)
char_to = torch.cat((first_inds.unsqueeze(1), torch.arange(len(first_inds)).cuda().unsqueeze(1)), dim=1)

In [None]:
first_inds

In [None]:
dict_pcs[0]

In [None]:
new_pcs_without_max = pcs.clone().detach()

In [None]:
# first_inds = torch.max(new_pcs_without_, dim=1)
first_inds = torch.argmax(new_pcs_without_max, dim=1)

In [None]:
char_to = torch.cat((first_inds.unsqueeze(1), torch.arange(len(first_inds)).cuda().unsqueeze(1)), dim=1)

In [None]:
pp(char_to)

In [None]:
graphs_chapter_one_max = nx.Graph(char_to.tolist())
indixes_per_chapter = [list(c_) for c_ in nx.connected_components(graphs_chapter_one_max)]

In [None]:
nx.draw_spring(graphs_chapter_one_max, arrows=True, with_labels=True)

### Проба с двумя максимумами по главе

In [None]:
for i in range(len(crop_embeddings_for_batch)):
    if i == 0:
        crop_embeds = crop_embeddings_for_batch[i]
    else:
        crop_embeds = torch.cat((crop_embeds, crop_embeddings_for_batch[i]), dim=0)

In [None]:
crop_embeds.shape  # torch.Size([100, 768])
pcs = pairwise_cosine_similarity(crop_embeds, crop_embeds)
pcs.shape  # torch.Size([100, 100])

In [None]:
new_pcs_without_ = pcs.clone().detach()

In [None]:
new_pcs_without_.fill_diagonal_(0.0)

In [None]:
# first_inds = torch.max(new_pcs_without_, dim=1)
first_inds = torch.argmax(new_pcs_without_, dim=1)

In [None]:
pp(first_inds)

In [None]:
new_pcs_without_.shape

In [None]:
new_pcs_without_[torch.arange(new_pcs_without_.shape[0]), first_inds] = 0.0

In [None]:
new_pcs_without_[1][35]

In [None]:
new_pcs_without_.shape

In [None]:
new_pcs_without_

In [None]:
val, indi = torch.topk(new_pcs_without_, 2, dim=1)

In [None]:
pp(val)

In [None]:
pp(indi)

In [None]:
pp(torch.topk(new_pcs_without_, 2, dim=1))

In [None]:
second_inds = torch.argmax(new_pcs_without_, dim=1)

In [None]:
pp(second_inds)

In [None]:
char_to_char = torch.stack((first_inds, second_inds), dim=1)

In [None]:
char_to_char.shape

In [None]:
char_to_char_last = torch.cat((indi, torch.arange(len(indi)).cuda().unsqueeze(1)), dim=1)

In [None]:
char_to_char_last.shape

In [None]:
pp(char_to_char_last)

In [None]:
type(char_to_char_last)

In [None]:
nx.from_edgelist(char_to_char_last.tolist())

In [None]:
lst_tensor = char_to_char_last.tolist()

In [None]:
graphs_chapter = nx.Graph()

# Добавляем вершины и ребра между ними
for triplet in lst_tensor:
    # Добавляем вершины в граф
    for vertex in triplet:
        graphs_chapter.add_node(vertex)
    
    # Добавляем ребра между всеми тремя вершинами
    graphs_chapter.add_edge(triplet[0], triplet[1])
    graphs_chapter.add_edge(triplet[1], triplet[2])

In [None]:
indixes_per_chapter = [list(c_) for c_ in nx.connected_components(graphs_chapter)]

In [None]:
graphs_chapter = nx.Graph(char_to_char_last.tolist())
indixes_per_chapter = [list(c_) for c_ in nx.connected_components(graphs_chapter)]

In [None]:
class_per_chapter = [0]*len(graphs_chapter.nodes)

In [None]:
for sublist in indixes_per_chapter:
        for item in sublist:
            class_per_chapter[item] = indixes_per_chapter.index(sublist)


In [None]:
nx.draw_spring(graphs_chapter, arrows=True, with_labels=True)

### AgglomerativeClustering и HDBSCAN

In [None]:
from sklearn.cluster import AgglomerativeClustering, HDBSCAN

In [None]:
clustering_agg = AgglomerativeClustering(
    n_clusters=None, metric="precomputed", linkage="complete", distance_threshold=0.9
)

In [None]:
clustering_agg.fit(pcs.cpu().numpy())
len(set(clustering_agg.labels_)), clustering_agg.labels_

In [None]:
clustering = HDBSCAN(metric="precomputed", min_cluster_size=7)
clustering.fit(pcs.cpu().numpy())
len(set(clustering.labels_)), clustering.labels_

In [None]:
plt.imshow(image_bboxes[1][1])

In [None]:
plt.imshow(image_bboxes[2][0])

### Проба с трешхолдом

In [None]:
character_character_matching_threshold = 0.85

In [None]:
indixes = []
in_pr = []

In [None]:
for embedding_per_page in crop_embeddings_for_batch:
    embedding_for_pairwise = embedding_per_page.cuda()
    pcs =  pairwise_cosine_similarity(embedding_for_pairwise, embedding_for_pairwise)
    char_i, char_j = torch.where(pcs > character_character_matching_threshold)
    character_character_associations = torch.stack([char_i, char_j], dim=1)
    graphs = nx.Graph(character_character_associations.tolist())
    indixes_per_image = [list(c) for c in nx.connected_components(graphs)]
    in_pr.append(indixes_per_image)
    class_per_image = [0]*len(graphs.nodes)
    for sublist in indixes_per_image:
        for item in sublist:
            class_per_image[item] = indixes_per_image.index(sublist)
    indixes.append(class_per_image)


In [None]:
nx.draw_spring(graphs, arrows=True, with_labels=True)

In [None]:
len(graphs.nodes)

In [None]:
plt.imshow(image_bboxes[5][5])