# Библиотеки

In [1]:
import os
import gc
import io
import time
import json
import glob
import timm

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from PIL import Image
from tqdm.notebook import tqdm
from datetime import datetime

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt


# Данные

In [6]:
os.makedirs("models", exist_ok=True)
os.makedirs("submits", exist_ok=True)

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
sample_submission = pd.read_csv("/content/drive/MyDrive/CV/sample_submission.csv")
df_train = pd.read_parquet("/content/drive/MyDrive/CV/train.parquet")
df_test = pd.read_parquet("/content/drive/MyDrive/CV/test.parquet")

In [78]:
print("train shape:", df_train.shape)
print("test shape :", df_test.shape)
print("sample_submission shape:", sample_submission.shape)

df_train.head(2)

In [79]:
idx = 10

row = df_train.iloc[idx]
print(f"is_image1_better = {row.get('is_image1_better', None)}")

img1_bytes = row["image_1"].tobytes() if isinstance(row["image_1"], memoryview) else row["image_1"]
img2_bytes = row["image_2"].tobytes() if isinstance(row["image_2"], memoryview) else row["image_2"]

img1 = Image.open(io.BytesIO(img1_bytes)).convert("RGB")
img2 = Image.open(io.BytesIO(img2_bytes)).convert("RGB")

fig, ax = plt.subplots(1, 2, figsize=(10, 4))
ax[0].imshow(img1)
ax[0].axis("off")
ax[0].set_title("image_1")

ax[1].imshow(img2)
ax[1].axis("off")
ax[1].set_title("image_2")

plt.tight_layout()
plt.show()

# Создание класса Dataset для картинок

In [None]:
class IPDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img1_bytes = row["image_1"].tobytes() if isinstance(row["image_1"], memoryview) else row["image_1"]
        img2_bytes = row["image_2"].tobytes() if isinstance(row["image_2"], memoryview) else row["image_2"]

        img1 = Image.open(io.BytesIO(img1_bytes)).convert("RGB")
        img2 = Image.open(io.BytesIO(img2_bytes)).convert("RGB")

        if self.transform is not None:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        if "is_image1_better" in self.df.columns:
            y = torch.tensor(row["is_image1_better"], dtype=torch.float32)
            return img1, img2, y

        return img1, img2

# Архитектура модели

In [17]:
class Mymodel(nn.Module):
    def __init__(self, freeze_encoder=True):
        super().__init__()
        self.encoder = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True)

        if freeze_encoder:
            for p in self.encoder.parameters():
                p.requires_grad = False


        self.encoder.head = nn.Identity()
        self.rank_head = nn.Linear(768, 1)

        self.classifier_head = nn.Sequential(
            nn.Linear(768 * 4, 2048),
            nn.BatchNorm1d(2048),
            nn.GELU(),
            nn.Dropout(0.4),

            nn.Linear(2048, 1024),
            nn.BatchNorm1d(1024),
            nn.GELU(),
            nn.Dropout(0.3),

            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(512, 1)
        )

    def forward(self, img1, img2, return_embeddings=False):
      features1 = self.encoder(img1)
      features2 = self.encoder(img2)

      if return_embeddings:
        return self.encoder(img1), self.encoder(img2)
      features1 = features1.mean(dim=(1, 2))
      features2 = features2.mean(dim=(1, 2))

      diff = features1 - features2
      mul = features1 * features2
      combined = torch.cat((features1, features2, diff, mul), dim=1)

      output = self.classifier_head(combined)
      return output

# Обучение модели

In [56]:
def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for images1, images2, labels in tqdm(dataloader, desc="Training"):
        images1, images2, labels = images1.to(device), images2.to(device), labels.to(device)

        features1, features2 = model(images1, images2, return_embeddings=True)

        score1 = model.rank_head(features1).mean(dim=(1, 2, 3))
        score2 = model.rank_head(features2).mean(dim=(1, 2, 3))

        score_diff = score1 - score2

        loss = F.binary_cross_entropy_with_logits(
            score_diff,
            labels.squeeze().float()
          )
        preds = (score_diff > 0)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images1.size(0)

        correct_predictions += (preds == labels).sum().item()
        total_samples += labels.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    return model, epoch_loss, epoch_acc



# Eval модели

In [57]:
def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    all_preds_logits = []
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images1, images2, labels in tqdm(dataloader, desc="Eval"):
            images1, images2, labels = (
                images1.to(device),
                images2.to(device),
                labels.to(device)
            )

            features1, features2 = model(images1, images2, return_embeddings=True)

            score1 = model.rank_head(features1).mean(dim=(1, 2, 3))
            score2 = model.rank_head(features2).mean(dim=(1, 2, 3))
            score_diff = score1 - score2

            loss = F.binary_cross_entropy_with_logits(score_diff, labels.squeeze().float())
            preds = (score_diff > 0)
            preds_logits = score_diff


            running_loss += loss.item() * images1.size(0)
            correct_predictions += (preds == labels).sum().item()
            total_samples += labels.size(0)

            all_preds_logits.append(preds_logits.cpu())
            all_labels.append(labels.cpu())

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    all_preds_logits = torch.cat(all_preds_logits)
    all_labels = torch.cat(all_labels)

    preds_proba = torch.sigmoid(all_preds_logits).numpy()
    auc_score = roc_auc_score(all_labels.numpy(), preds_proba)

    return epoch_loss, epoch_acc, auc_score, all_preds_logits, all_labels


# EarlyStopping для kfold валидации

In [None]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0.0, path='checkpoint.pth'):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.path = path

        self.best_score = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, score, model):
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(model)
            return

        if score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} из {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            self.save_checkpoint(model)

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.path)

# Kfold cross validation

In [59]:
def kfold_cross_validation(df, dataset_class,
                          num_folds=5, num_epochs=50, batch_size=16,
                          patience=5, min_delta=0.001, pretrain_with_ranking=True, exp_time=''):

    skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
    df['fold'] = -1
    for fold, (_, val_idx) in enumerate(skf.split(df, df['is_image1_better'])):
        df.loc[val_idx, 'fold'] = fold

    results = []

    experiment_dir = f'./models/exp_{exp_time}'
    stage1_dir = os.path.join(experiment_dir, 'stage1')
    stage2_dir = os.path.join(experiment_dir, 'stage2')
    os.makedirs(stage1_dir, exist_ok=True)
    os.makedirs(stage2_dir, exist_ok=True)

    for fold in range(num_folds):
        print(f"\n--- Fold {fold+1}/{num_folds} {datetime.now().strftime('%y-%m-%d %H:%M:%S')} ---")

        train_df = df[df['fold'] != fold]
        val_df = df[df['fold'] == fold]
        train_dataset = dataset_class(train_df.reset_index(drop=True), transform=train_transforms)
        val_dataset = dataset_class(val_df.reset_index(drop=True), transform=val_transforms)

        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            persistent_workers=False,
            num_workers=6,
            pin_memory=True
        )
        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=False,
            persistent_workers=False,
            num_workers=6,
            pin_memory=True
        )

        print("Stage 1: Pretraining encoder")
        model = Mymodel(freeze_encoder=False).to(device)
        model_name = model.encoder._get_name()

        criterion_stage1 = None
        optimizer_stage1 = optim.AdamW(
            model.parameters(),
            lr=LEARNING_RATE,
            weight_decay=0.01
            )

        early_stopping_stage1 = EarlyStopping(
            patience=patience,
            verbose=True,
            delta=min_delta,
            path=os.path.join(stage1_dir, f"stage1_fold{fold}_{exp_time}.pth")
            )
        best_auc_stage1 = 0.0
        for epoch in range(num_epochs):
          model, train_loss, _ = train_model(
              model, train_loader, criterion_stage1, optimizer_stage1, device,
          )
          val_loss, val_acc, auc_score, _, _ = evaluate_model(
              model, val_loader, criterion_stage1, device
          )
          print(f"Stage1 Epoch {epoch+1} - Val AUC: {auc_score:.4f}")
          early_stopping_stage1(auc_score, model)
          if auc_score > best_auc_stage1:
            best_auc_stage1 = auc_score
            stage1_model_path = os.path.join(
                stage1_dir,
                f"{model_name}_fold{fold}_{exp_time}.pth"
                )
            torch.save(model.state_dict(), stage1_model_path)
          best_auc_stage1 = max(best_auc_stage1, auc_score)
          if early_stopping_stage1.early_stop:
            break

        print("Stage 2: Training classifier head")
        model = Mymodel(freeze_encoder=True).to(device)
        model.load_state_dict(torch.load(stage1_model_path))
        model.encoder.requires_grad_(False)

        criterion_stage2 = nn.BCEWithLogitsLoss()
        optimizer_stage2 = optim.AdamW(
            model.classifier_head.parameters(),
            lr=LEARNING_RATE,
            weight_decay=0.01
            )

        early_stopping_stage2 = EarlyStopping(
            patience=patience,
            verbose=True,
            delta=min_delta,
            path=os.path.join(stage2_dir, f"stage2_fold{fold}_{exp_time}.pth")
            )
        best_auc_stage2 = 0.0
        for epoch in range(num_epochs):
          model, train_loss, train_acc = train_model(
              model, train_loader, criterion_stage2, optimizer_stage2, device
          )
          val_loss, val_acc, auc_score, _, _ = evaluate_model(
              model, val_loader, criterion_stage2, device
          )
          print(f"Stage2 Epoch {epoch+1}")
          print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
          print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, AUC: {auc_score:.4f}")
          early_stopping_stage2(auc_score, model)
          if auc_score > best_auc_stage2:
            best_auc_stage2 = auc_score
            stage2_model_path = os.path.join(
                stage2_dir,
                f"{model_name}_fold{fold}_{exp_time}.pth"
                )
            torch.save(model.state_dict(), stage2_model_path)
          if early_stopping_stage2.early_stop:
            break
        results.append(best_auc_stage2)
        print(f"Best AUC on Fold {fold+1}: {best_auc_stage2:.4f}\n")

        del model
        del train_loader
        del val_loader
        time.sleep(1)
        gc.collect()
        torch.cuda.empty_cache()
    print("Результаты по фолдам:", results)
    print("Среднее AUC:", np.mean(results))
    return np.mean(results)



# Функции для предсказания

In [22]:
def load_model_for_inference_ranknet(model_path, device):
    model = Mymodel(freeze_encoder=True).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model


@torch.no_grad()
def predict_ranknet_probs(model, test_loader, device):
    all_probs = []
    for images1, images2 in tqdm(test_loader, desc="Predicting"):
        images1 = images1.to(device)
        images2 = images2.to(device)
        logits = model(images1, images2)
        probs = torch.sigmoid(logits)
        all_probs.extend(probs.cpu().numpy())
    return np.array(all_probs)

In [23]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(f"Используемое устройство: {device}")

Используемое устройство: cuda


# Агументации

In [25]:
import torchvision.transforms as transforms

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Кросс валидация

In [64]:
BATCH_SIZE = 16
LEARNING_RATE = 1e-4
EPOCHS = 8

train_dataset = IPDataset(df_train, transform=train_transforms)
test_dataset = IPDataset(df_test, transform=val_transforms)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    persistent_workers=False,
    num_workers=6,
    pin_memory=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    persistent_workers=False,
    num_workers=6,
    pin_memory=True
)

experiment_time = datetime.now().strftime("%y%m%d%H%M%S")

In [80]:
cross_val_score = kfold_cross_validation(
    df=df_train,
    dataset_class=IPDataset,
    patience=2,
    num_folds=5,
    num_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    pretrain_with_ranking=True,
    exp_time=experiment_time,
)

# Предсказания test

In [81]:
test_dataset = IPDataset(df_test, transform=val_transforms)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    persistent_workers=False,
    num_workers=6,
    pin_memory=True
)

ranknet_dir = f"models/exp_{experiment_time}/stage2"
model_paths = sorted(glob.glob(os.path.join(ranknet_dir, "*.pth")))

all_fold_probs = []
for p in model_paths:
    model = load_model_for_inference_ranknet(p, device)
    probs = predict_ranknet_probs(model, test_loader, device)
    all_fold_probs.append(probs)

final_probs = np.mean(all_fold_probs, axis=0)
final_preds = (final_probs > 0.5).astype(int)

os.makedirs(f"models/exp_{experiment_time}", exist_ok=True)

os.makedirs("submits", exist_ok=True)

# Формирование сабмита

In [82]:
submission = pd.DataFrame({"is_image1_better": final_probs.reshape(-1)})
submission.index.names = ["index"]
submit_path = f"submits/submission_{experiment_time}.csv"
submission.to_csv(submit_path, index=True)