In [1]:
import random
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torchvision
import matplotlib.pyplot as plt
from IPython.display import clear_output
import torchvision.transforms as tt

from PIL import Image
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.models import ResNet18_Weights
from tqdm.notebook import tqdm
from torchvision.utils import make_grid
from torchvision.io import decode_image
from pathlib import Path
import torchvision.transforms.functional as F
from torchvision import models as vision_models
from torch.optim.lr_scheduler import StepLR

from transformers import SwinForImageClassification, SwinConfig
import torch.nn as nn
import torch

import timm


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(1001)

In [3]:
import os
import shutil

src_dir = "data/test"
dest_dir = "data/train"

if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

src_files = len([f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))])
print(f"Исходная папка содержит {src_files} файлов.")

# Копирование файлов
for item in os.listdir(src_dir):
    s = os.path.join(src_dir, item)
    d = os.path.join(dest_dir, item)
    if os.path.isfile(s):
        shutil.copy2(s, d)

dest_files = len([f for f in os.listdir(dest_dir) if os.path.isfile(os.path.join(dest_dir, f))])
print(f"Целевая папка содержит {dest_files} файлов.")

Исходная папка содержит 16342 файлов.
Целевая папка содержит 56500 файлов.


In [4]:
map = ['Бурый медведь', 'Гималайский медведь', 'Кабан', 'Изюбрь', 'Пятнистый олень', 'Марал', 'Сибирская косуля', 'Азиатский барсук', 'Соболь', 'Амурский лесной кот', 'Манул', 'Рысь', 'Тигр', 'Ирбис', 'Аргали', 'Козерог', 'Волк', 'Лиса', 'Енотовидная собака', 'Заяц', 'Сурок']  # имена классов
data = pd.read_csv('train.csv')
data2 = pd.read_csv('best_subm.csv') # сабмит, по которому делаем стемминг
data2['unified_class'] = data2['predicted_class'].apply(lambda x: map[x])
data2.rename(columns={'predicted_class': 'class_id'}, inplace=True)
data2 = data2.iloc[:, ::-1]
data2

Unnamed: 0,unified_class,class_id,image_name
0,Пятнистый олень,4,7fa5443d968d45948ef2f4cc0272153e.JPG
1,Пятнистый олень,4,1c8de42ff3230ea94ff56495f24beebd.JPG
2,Пятнистый олень,4,68d9318e9252782b3e56367e676f7c0a.JPG
3,Кабан,2,225765269b9bdf06066534be55ea48b5.JPG
4,Пятнистый олень,4,2ddb2f9912fcc5180b3f85bf4475ab29.JPG
...,...,...,...
16337,Сурок,20,861e57dbae6b44611b703b8e149b14ca.JPG
16338,Пятнистый олень,4,0931f74251973aed69c6ed501f7f90e1.JPG
16339,Ирбис,13,dc674e40c4a4a850a8d6ae45469dacae.JPG
16340,Кабан,2,1367e0d473118ce9ebc36949a15907b9.JPG


In [5]:
resss = pd.concat([data, data2]).reset_index().iloc[:, 1:]
resss.to_csv('data_stem.csv', index=False)

In [6]:
resss

Unnamed: 0,unified_class,class_id,image_name
0,Кабан,2,0fa5f1cd824ca3705b654ada33437337.JPG
1,Изюбрь,3,da2969de9de9ca78eee5ba249e52ce89.JPG
2,Пятнистый олень,4,5b5d9c79cec42ea630d9f29ae59d2d6d.JPG
3,Рысь,11,e5dfc9f687530959332a2f8aa0594140.JPG
4,Сибирская косуля,6,50de65ad44e8dc71ed7f122eff3f2164.JPG
...,...,...,...
56495,Сурок,20,861e57dbae6b44611b703b8e149b14ca.JPG
56496,Пятнистый олень,4,0931f74251973aed69c6ed501f7f90e1.JPG
56497,Ирбис,13,dc674e40c4a4a850a8d6ae45469dacae.JPG
56498,Кабан,2,1367e0d473118ce9ebc36949a15907b9.JPG


In [7]:
base_path = Path("data/")
data = pd.read_csv('data_stem.csv')
images_path = base_path / "train"

train, val = train_test_split(data, test_size=0.1, random_state=1)

train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

print(train.shape, val.shape)

(50850, 3) (5650, 3)


In [8]:
data

Unnamed: 0,unified_class,class_id,image_name
0,Кабан,2,0fa5f1cd824ca3705b654ada33437337.JPG
1,Изюбрь,3,da2969de9de9ca78eee5ba249e52ce89.JPG
2,Пятнистый олень,4,5b5d9c79cec42ea630d9f29ae59d2d6d.JPG
3,Рысь,11,e5dfc9f687530959332a2f8aa0594140.JPG
4,Сибирская косуля,6,50de65ad44e8dc71ed7f122eff3f2164.JPG
...,...,...,...
56495,Сурок,20,861e57dbae6b44611b703b8e149b14ca.JPG
56496,Пятнистый олень,4,0931f74251973aed69c6ed501f7f90e1.JPG
56497,Ирбис,13,dc674e40c4a4a850a8d6ae45469dacae.JPG
56498,Кабан,2,1367e0d473118ce9ebc36949a15907b9.JPG


In [10]:
class Model(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        config = SwinConfig.from_pretrained("microsoft/swin-base-patch4-window12-384-in22k")
        self.model = SwinForImageClassification.from_pretrained(
            "microsoft/swin-base-patch4-window12-384-in22k", config=config
        )

        num_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(num_features, num_classes)


    def forward(self, batch):
        inputs, _ = batch
        outputs = self.model(pixel_values=inputs)
        return outputs.logits

In [13]:
class CustomDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, path_to_images: Path, transforms: tt.Compose) -> None:
        self.df = dataframe
        self.path_to_images = path_to_images
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(self.path_to_images / row["image_name"]).convert('RGB')
        if self.transforms is not None:
            image = self.transforms(image)
        return image, row["class_id"]

In [16]:
train_transform = tt.Compose([
    tt.RandomGrayscale(p=0.5),
    tt.RandomResizedCrop(384),
    tt.RandomHorizontalFlip(),
    tt.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.12),
    tt.RandomRotation(15),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485
                       , 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


val_transform = tt.Compose([
    tt.Resize((384, 384)),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train, images_path, transforms=train_transform)
val_dataset = CustomDataset(val, images_path, transforms=val_transform)


train_dataloader = DataLoader(train_dataset, batch_size=32, num_workers=0, shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=32, num_workers=0, shuffle=False)

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Для обучения выбран девайс {}".format(device))

Для обучения выбран девайс cuda


In [20]:
model = Model(num_classes=data["unified_class"].nunique()).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

In [21]:
train_losses = []
val_losses = []
train_f1_scores = []
val_f1_scores = []

best_val_f1 = 0.0
best_model_path = 'best.pth'

num_epochs = 10

scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
if not os.path.exists('models'):
  os.mkdir('models')

In [3]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_true = []
    train_pred = []

    for batch in tqdm(train_dataloader):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model((inputs, labels))
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        running_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        train_true.extend(labels.cpu().numpy())
        train_pred.extend(preds.cpu().numpy())

    train_f1 = f1_score(train_true, train_pred, average='macro')
    train_losses.append(running_loss / len(train_dataloader))
    train_f1_scores.append(train_f1)

    scheduler.step()

    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model((inputs, labels))
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(preds.cpu().numpy())

    val_f1 = f1_score(val_true, val_pred, average='macro')
    val_losses.append(val_running_loss / len(valid_dataloader))
    val_f1_scores.append(val_f1)

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)
        print(f'New best model saved with F1: {best_val_f1:.4f}')

    torch.save(model.state_dict(), f'models/epoch_93_{epoch+1}.pth')

    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Train Loss: {train_losses[-1]:.4f}, Train F1: {train_f1:.4f}, '
          f'Val Loss: {val_losses[-1]:.4f}, Val F1: {val_f1:.4f}')

Epoch [1/10], Train Loss: 0.6534, Train F1: 0.7042, Val Loss: 0.1435, Val F1: 0.9003
Epoch [2/10], Train Loss: 0.4281, Train F1: 0.8256, Val Loss: 0.1008, Val F1: 0.9664
Epoch [3/10], Train Loss: 0.3714, Train F1: 0.8487, Val Loss: 0.0973, Val F1: 0.9301
Epoch [4/10], Train Loss: 0.3212, Train F1: 0.9056, Val Loss: 0.0575, Val F1: 0.9522
Epoch [5/10], Train Loss: 0.3055, Train F1: 0.9234, Val Loss: 0.0575, Val F1: 0.9578
Epoch [6/10], Train Loss: 0.2711, Train F1: 0.9256, Val Loss: 0.0571, Val F1: 0.9581
Epoch [7/10], Train Loss: 0.1934, Train F1: 0.9273, Val Loss: 0.0573, Val F1: 0.9577
Epoch [8/10], Train Loss: 0.1736, Train F1: 0.9294, Val Loss: 0.0569, Val F1: 0.9635
Epoch [9/10], Train Loss: 0.1398, Train F1: 0.9260, Val Loss: 0.0562, Val F1: 0.9684
Epoch [10/10], Train Loss: 0.1235, Train F1: 0.9264, Val Loss: 0.0567, Val F1: 0.9664


In [None]:
class InferenceDataset(Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transforms is not None:
            image = self.transforms(image)
        return image, image_path.name

In [None]:
infer_transform = tt.Compose([
    tt.Resize((384, 384)),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
test_image_paths = Path('data/test')
test_image_paths = list(test_image_paths.glob("*.JPG"))

infer_dataset = InferenceDataset(test_image_paths, transforms=infer_transform)
infer_dataloader = DataLoader(infer_dataset, batch_size=96, shuffle=False)

best_model_path = 'models/epoch_93_10.pth'

In [None]:
model.load_state_dict(torch.load(best_model_path))

In [None]:
model.eval()

results = []
for images, image_names in tqdm(infer_dataloader):
    images = images.to(device)

    with torch.no_grad():
        outputs = model((images, None))
        preds = torch.argmax(outputs, dim=1).cpu().numpy()

    for image_name, pred_class in zip(image_names, preds):
        results.append({"image_name": image_name, "predicted_class": pred_class})

In [25]:
df_results = pd.DataFrame(results)
print(df_results.head())

  model.load_state_dict(torch.load(best_model_path))


  0%|          | 0/171 [00:00<?, ?it/s]

                             image_name  predicted_class
0  000028438ed4f212bfcf4f1c3b4b1aab.JPG                0
1  000a25d7a14f15f2bb6349640653cdbc.JPG               12
2  000eb8747b0cd8f848c2f98048b5445d.JPG               11
3  0010bd149e23b169ffc16e5627ece2bf.JPG               15
4  0011a31e779ec92689aad0e0139bec7c.JPG                4


In [26]:
df_results.to_csv('go95pls.csv', index=False)