In [None]:
# Установка дополнительных заввисимостей
# !pip install multidict -q

In [None]:
# Импорт необходимых библиотек
import pandas as pd
import numpy as np
import os
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image

import albumentations as A
from albumentations.pytorch import ToTensorV2

from torch.utils.data import Dataset, DataLoader, Sampler
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as models
from sklearn.model_selection import train_test_split

import cv2

from tqdm.auto import tqdm

from numba import jit
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from multidict import MultiDict

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Зафиксируем сиды для воспроизводимости
DEFAULT_RANDOM_SEED = 42
import random

def set_all_seeds(seed=DEFAULT_RANDOM_SEED):

    # python's seeds
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

    # torch's seeds
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_all_seeds(seed=DEFAULT_RANDOM_SEED)

In [None]:
# Класс для создания тренировочной и тестовой выборки

from typing import List, Tuple

class FullDataset():
    def __init__(self, path_kylberg: str, path_loz: str, classes_from_loz: List[str] = None) -> pd.DataFrame:
        self.path_kylberg = path_kylberg
        self.path_loz = path_loz
        self.classes_from_loz = classes_from_loz
        
    def create_kylberg(self) -> pd.DataFrame:
        folder_names = [name for name in os.listdir(self.path_kylberg) if os.path.isdir(os.path.join(self.path_kylberg, name))]
        
        label2id_kylberg = {} 
        for ind, name in enumerate(folder_names):
            label2id_kylberg[name] = ind
        
        root_path = Path(self.path_kylberg)
        data = {'class_name': [], 'image_name': []}
        
        for class_folder in tqdm(root_path.iterdir()):
            if class_folder.is_dir():
                for image_file in class_folder.iterdir():
                    if image_file.is_file(): 
                        data['class_name'].append(class_folder.name)
                        data['image_name'].append(image_file.name)
        
        df = pd.DataFrame(data)
        df['class'] = df['class_name'].map(label2id_kylberg)
#         df = df.drop(['class_name'], axis=1)
        df['dataset_type'] = 'kylberg'
        train_kylberg, test_kylberg = train_test_split(df, random_state=42, shuffle=True, test_size=.3)
        
        train_kylberg = train_kylberg.reset_index(drop=True)
        test_kylberg = test_kylberg.reset_index(drop=True)
        
        return train_kylberg, test_kylberg
    
    def create_loz(self) -> pd.DataFrame:
        label2id_lozz = {
            'GP': 0, 'G': 1, 'M': 2, 'T': 3, 'clear': 4
        }
        
        root_path = Path(self.path_loz)
        data = {'class_name': [], 'subfolder_name': [], 'image_name': []}

        for class_folder in root_path.iterdir():
            if class_folder.is_dir():
                for subfolder in class_folder.iterdir():
                    if subfolder.is_dir():
                        for image_file in subfolder.iterdir():
                            if image_file.is_file():
                                data['class_name'].append(class_folder.name)
                                data['subfolder_name'].append(subfolder.name)
                                data['image_name'].append(image_file.name)
        
        df = pd.DataFrame(data)
#         df['class'] = df['class_name'] + '_' + df['subfolder_name']
#         label2id_lozz = {label: ind for ind, label in enumerate(df['class'].unique().tolist())}
#         df['class'] = df['class'].map(label2id_lozz)
        df['class'] = df['subfolder_name'].map(label2id_lozz)
        df['dataset_type'] = 'loz'
        
#         df = df.loc[df['subfolder_name'].isin(self.classes_from_loz)]
        train_loz, test_loz = train_test_split(df, random_state=42, shuffle=True, test_size=.3, stratify=df['class'])
        
        train_loz = train_loz.reset_index(drop=True)
        test_loz = test_loz.reset_index(drop=True)
        
        return train_loz, test_loz, label2id_lozz
    
    def create_full_df(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        train_kylberg, test_kylberg = self.create_kylberg()
        train_loz, test_loz = self.create_loz()
        train = pd.concat([train_kylberg, train_loz])
        train = train.reset_index(drop=True)
        train.loc[train['dataset_type'] == 'loz', 'class'] += 28
        test_loz.loc[test_loz['dataset_type'] == 'loz', 'class'] += 28
        
        return train, test_kylberg, test_loz
    
    def split_data(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        return self.create_full_df()

In [None]:
# Функция 2 вида для разрезания на фрагменты

@jit(nopython=True)
def cut_fragments(image, mode, n, size):
    height, width = image.shape[:2]
    fragments = []
    if mode == 'central':
        for i in range(n):
            for j in range(n):
                left = (width / n) * i
                upper = (height / n) * j
                right = left + size
                lower = upper + size
                fragment = image[int(upper):int(lower), int(left):int(right)]
                fragments.append(fragment)
    elif mode == 'random':
        for _ in range(n):
            left = random.randint(0, width - size)
            upper = random.randint(0, height - size)
            right = left + size
            lower = upper + size
            fragment = image[int(upper):int(lower), int(left):int(right)]
            fragments.append(fragment)
    return fragments

In [None]:
# Класс для итерации по выборке

class TorchDataset(Dataset):
    def __init__(self, df:pd.DataFrame, transform_kylberg = None, transform_loz = None,
                 mode: str = 'central', n: int = 3, size: int = 224,
                 kylberg_path: str = '/kaggle/input/kylberg-texture-dataset', 
                 loz_path: str = '/kaggle/input/lozzzz/dataset-loz2/dataset2'
            ):
        
        self.df = df
        self.mode = mode
        self.n = n
        self.size = size
        self.kylberg_path = kylberg_path
        self.loz_path = loz_path
        self.transform_kylberg = transform_kylberg
        self.transform_loz = transform_loz
        self.prepare_dataset()

    def prepare_dataset(self):
        # Вычисляем общее количество элементов для каждого типа данных и сохраняем в новый столбец
        self.df['total_items'] = self.df.apply(lambda x: self.n*self.n if x['dataset_type'] == 'loz' and self.mode == 'central' else self.n if x['dataset_type'] == 'loz' and self.mode == 'random' else 1, axis=1)
        # Создаем кумулятивную сумму для определения начального индекса для каждой строки
        self.df['cumsum'] = self.df['total_items'].cumsum() - self.df['total_items']
        
    def __len__(self):
        # Общее количество элементов равно кумулятивной сумме последнего элемента
        return self.df['cumsum'].iloc[-1] + self.df['total_items'].iloc[-1]

    def __getitem__(self, idx):
        row = self.df[self.df['cumsum'] <= idx].iloc[-1]
        local_idx = idx - row['cumsum']
        
        if row['dataset_type'] == 'kylberg':
            image_path = os.path.join(self.kylberg_path, row['class_name'], row['image_name'])
            image = Image.open(image_path).convert('L')
        else:  # 'loz'
            img_path = os.path.join(self.loz_path, row['class_name'], row.get('subfolder_name', ''), row['image_name'])
            image = Image.open(img_path).convert('L')
            fragments = cut_fragments(np.array(image), self.mode, self.n, self.size)
            image = Image.fromarray(fragments[local_idx])
#         image_np = np.array(image, dtype=np.float32)
        image_np = np.array(image, dtype=np.float32) / 255.0
        if row['dataset_type'] == 'kylberg' and self.transform_kylberg:
            image_np = self.transform_kylberg(image=image_np)['image']
        elif row['dataset_type'] == 'loz' and self.transform_loz:
            image_np = self.transform_loz(image=image_np)['image']

        image_t = torch.tensor(image_np, dtype=torch.float32)
        label = row['class']

        return image_t, row['image_name'], label

In [None]:
# Добавление шума Соль и Перец

class SaltAndPepper(A.ImageOnlyTransform):
    def __init__(self, salt_ratio=0.5, amount=0.0008, p=0.5):
        super().__init__(p=p) 
        self.salt_ratio = salt_ratio
        self.amount = amount

    def apply(self, image, **params):
        image_copy = np.copy(image)

        num_salt = np.ceil(self.amount * image.size * self.salt_ratio)
        coords_salt = [np.random.randint(0, i - 1, int(num_salt)) for i in image_copy.shape]
        image_copy[coords_salt[0], coords_salt[1]] = 1

        num_pepper = np.ceil(self.amount * image.size * (1.0 - self.salt_ratio))
        coords_pepper = [np.random.randint(0, i - 1, int(num_pepper)) for i in image_copy.shape]
        image_copy[coords_pepper[0], coords_pepper[1]] = 0

        return image_copy

In [None]:
# Применяемые искажения

transform_loz = A.Compose([
    A.HorizontalFlip(p=.3),
    A.Rotate(limit=30, p=.3),
    SaltAndPepper(salt_ratio=0.4, p=1.),
#     A.Resize(224, 224, p=1.)
])

In [None]:
full_dataset = FullDataset(
                path_kylberg = '/kaggle/input/kylberg-texture-dataset',
                path_loz = '/kaggle/input/lozzzz/dataset-loz2/dataset2',
            )

train_loz, valid_loz, label2id_loz = full_dataset.create_loz()

In [None]:
train_dataset_loz = TorchDataset(train_loz, transform_loz=transform_loz)
valid_dataset_loz = TorchDataset(valid_loz)
train_dataloader_loz = DataLoader(train_dataset_loz, batch_size=32, shuffle=True, drop_last=False)
valid_dataloader_loz = DataLoader(valid_dataset_loz, batch_size=32, shuffle=False, drop_last=False)

In [None]:
from collections import OrderedDict

NUM_CLASSES = len(label2id_loz)

repo_id = 'danzzzll/mobilenet-v2-textures'
filename = 'mob-v2_trainKylberg_finetuneLoz-2class.pth'

class LozzModel():
    def __init__(self, model_name: str, kylberg_on: bool = False, num_classes: int = 5, repo_id: str = None, filename: str = None):
        self.model_name = model_name
        self.kylberg_on = kylberg_on
        self.num_classes = num_classes
        self.repo_id = repo_id
        self.filename = filename
        
    def load_model(self):
        
        if self.model_name == 'mobilenet-v2':
            model = models.mobilenet_v2(pretrained=True)
            model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            model.classifier[1] = nn.Linear(model.last_channel, self.num_classes)
            
        elif self.model_name == 'efficientnet-b0':
            model = models.efficientnet_b0(pretrained=True)
            model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            model.classifier[1] = nn.Linear(in_features=1280, out_features=self.num_classes, bias=True)
            
        elif self.model_name == 'efficientnet-b1':
            model = models.efficientnet_b1(pretrained=True)
            first_conv_layer = model.features[0][0]
            model.features[0][0] = torch.nn.Conv2d(1, first_conv_layer.out_channels, 
                                      kernel_size=first_conv_layer.kernel_size, 
                                      stride=first_conv_layer.stride, 
                                      padding=first_conv_layer.padding, bias=False)
            model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, self.num_classes)
          
        elif self.model_name == 'efficientnet_v2_l':
            model = models.efficientnet_v2_l(weights='IMAGENET1K_V1')
            model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, self.num_classes, bias=True)
            
        elif self.model_name == 'resnet-50':
            model = models.resnet50(pretrained=True)
            model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
            model.fc = nn.Linear(in_features=2048, out_features=self.num_classes, bias=True)
            
        print(f'load {self.model_name} model with ImageNet weights and {self.num_classes} classes')
        return model
    
    def load_kylberg_weights(self):
        
        weights_path = hf_hub_download(repo_id=self.repo_id, filename=self.filename)
        state_dict = torch.load(weights_path)
        
        if self.model_name == 'mobilenet-v2':
            model = models.mobilenet_v2(weights=None)
            model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            model.classifier[1] = nn.Linear(model.last_channel, 28)
            model.load_state_dict(state_dict)
            model.classifier[1] = nn.Linear(model.last_channel, self.num_classes)
        print(f'load {self.model_name} model with Kylberg weights')
        return model
    
model = LozzModel(model_name='mobilenet-v2', repo_id='danzzzll/mobilenet-v2-textures', 
                  filename='mob-v2_trainKylberg_finetuneLoz-2class.pth', 
                  num_classes=NUM_CLASSES
                )
model = model.load_model()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters())

milestones = [12, 15, 26]
gamma = 0.3
exp_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=gamma)

In [None]:
import wandb
wandb.init(
    project="kylberg with lozz",
    name='experiment 10.4',
    config={
        "architecture": "mobilenet",
        "dataset": "lozz",
        "epochs": 30,
        "fragments": 9,
        "central": True,
        "batch_size": 32,
        "classes": 5,
        "size": 224,
#         "augmentations_kylberg": """
#             1. Горизонтальное отображение
#             2. Поворот 
#             3. Вертикальное отображение
#             4. Случайное изменение яркости и контраста
#             5. Гауссовский шум
#             6. Соль и перец, salt=0.4
#             7. Нормализация
#         """,
        "augmentations_lozz": """
           1. Горизонтальное отображение
           2. Поворот
           3. Соль и перец
           4. Нормализация
           5. Разделение на 9 фрагментов по 224x224
        """,
#         "augmentation_lozz": "9 фрагментов, больше ничего",
        "notes": "5 классов Loz ",
    }
)

In [None]:
# Функция для обучения и оценки модели

train_loss = []
train_acc = []
train_full = []
test_loss = []
test_acc = []
test_full = []

def train_and_validate_lozz(epoch, model, train_dataloader_lozz, valid_dataloader_lozz):
                                                   ### train
    
    print(f'EPOCH: {epoch + 1}')
    running_loss = 0.0
    running_acc = 0.0
    fragments_train = MultiDict()
    model.train()
    for batch_idx, (data, image_name, target) in tqdm(enumerate(train_dataloader_lozz)):
        target = target.type(torch.LongTensor).to(device)
        data = data.unsqueeze(1)
        data, target = data.to(device).float(), target.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = outputs.argmax(dim=1)
        running_acc += (preds == target).float().mean().item()
        
        # quality on full images
        for n, pred in zip(image_name, preds):
            fragments_train.add(n, pred.item()) 
    
    result_train = most_common_class_per_key(fragments_train)
    train_full.append(accuracy_full(result_train, train_loz))
    train_loss.append(running_loss / len(train_dataloader_lozz))
    train_acc.append(running_acc / len(train_dataloader_lozz))
    
    print(f"Epoch {epoch+1}, Train Loss: {train_loss[-1]:.3f}, Train Acc: {train_acc[-1]:.3f}, Train Full Images Acc: {train_full[-1]:.3f}")
    exp_lr_scheduler.step()
    
                                                ### validate

    model.eval()
    all_preds = [] 
    all_targets = [] 
    fragments_test = MultiDict()
#     id2label = {0: 'GP',1: 'G'} #, 2: 'M', 3: 'T',4: 'clear'}a
    id2label = {v: k for k, v in label2id_loz.items()}
    with torch.no_grad():
        running_acc = 0.0
        for batch_idx, (data, image_name, target) in enumerate(valid_dataloader_lozz):
            target = target.type(torch.LongTensor).to(device)
            data = data.unsqueeze(1)
            data, target = data.to(device).float(), target.to(device)

            outputs = model(data)
            loss = criterion(outputs, target)
            running_loss += loss.item()
            preds = outputs.argmax(dim=1)
            running_acc += (preds == target).float().mean().item()

            all_targets.extend(target.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            
            # quality on full images
            for n, pred in zip(image_name, preds):
                fragments_test.add(n, pred.item())
    
    result_test = most_common_class_per_key(fragments_test)
    test_full.append(accuracy_full(result_test, valid_loz))
    test_loss.append(running_loss / len(valid_dataloader_lozz))
    test_acc.append(running_acc / len(valid_dataloader_lozz))
    
    print(f"Epoch {epoch+1}, Valid Loss: {test_loss[-1]:.3f}, Valid Acc: {test_acc[-1]:.3f}, Valid Full Images Acc: {test_full[-1]:.3f}")
    
    targets_labels = np.array([id2label[id] for id in all_targets])
    preds_labels = np.array([id2label[id] for id in all_preds])
    cm = confusion_matrix(targets_labels, preds_labels, labels=list(id2label.values()))
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    labels = list(id2label.values())
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=labels)
    disp.plot()
    plt.title('Normalized Confusion Matrix')
    plt.show()
    
    wandb.log({
        "train_acc": train_acc[-1],
        "train_loss": train_loss[-1],
        "train_full": train_full[-1] / 100,
        "valid_acc": test_acc[-1],
        "valid_loss": test_loss[-1],
        "valid_full": test_full[-1] / 100
    })
    
    return model

In [None]:
def get_predictions(dataloader, model=model):
    """
    возвращает MultiDict, в котором каждому названию картинки соответствует несколько значений
    это предсказания для фрагментов данного изображения
    для каждого изображения будет строчек столько, на сколько фрагментов разбиваем это изображение
    """
    fragments = MultiDict()
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, name, target) in enumerate(dataloader):
            data = data.unsqueeze(1)
            data, target = data.to(device), target.to(device)
            data = data.float()
            outputs = model.to(device)(data)
            preds = outputs.argmax(dim=1)
            
            for n, pred in zip(name, preds):
                fragments.add(n, pred.item())
    return fragments

from collections import Counter

def most_common_class_per_key(multidict):
    """
    Получает MultiDict на вход и подсчитывает для одного изображения самый частый предсказанный класс
    Выдает словарь, с названием изображения и самым частым классом
    """
    result = {}
    keys = set(multidict.keys())
    
    for key in keys:
        values = multidict.getall(key)
        count = Counter(values)  
        most_common_class, _ = count.most_common(1)[0]
        result[key] = most_common_class
        
    return result

def accuracy_full(result: dict, df:pd.DataFrame):  
    """
    result - словарь, где каждому пути к изображению сопоставляется самый часто встречаемый класс
    return accuracy - между предсказанными значениями и истинными
    """
    true_val = 0.0
    for key, value in result.items():
        y_true = df.loc[df['image_name'] == key]['class'].item()
        y_pred = result[key]
#         print(y_true)
#         print(y_pred)
        if y_true == y_pred:
            true_val += 1
    accuracy = round(true_val / len(result) * 100, 3)
    return accuracy

In [None]:
best_loss = float('inf')
epochs_without_improvement = 0
early_stopping_threshold = 4

for epoch in range(30):
    model = train_and_validate_lozz(epoch, model, train_dataloader_loz, valid_dataloader_loz)
    
    if test_loss[-1] < best_loss:
        best_loss = test_loss[-1]
        best_weights = model.state_dict()
        print(f'Best weights on {epoch + 1} EPOCH')
        print()
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
        
    if epochs_without_improvement >= early_stopping_threshold:
        print("Early stopping triggered after {} epochs without improvement.".format(epochs_without_improvement))
        break
model.load_state_dict(best_weights)

In [None]:
wandb.finish()