# Исследование влияния аугментаций на работу CNN

In [1]:
import torch
import torch.nn as nn
from torch.functional import F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from tqdm import tqdm
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import time
import matplotlib.pyplot as plt
import os
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
plt.style.use('dark_background')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  data = fetch_version_info()


## Скачиваем данные Intel Image Classification с Kaggle

In [2]:
# Классы датасета Intel Image classification
classes = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

train_path = '/kaggle/input/intel-image-classification/seg_train/seg_train'
val_path = '/kaggle/input/intel-image-classification/seg_test/seg_test'

train_img_path = []
train_img_label = []
for folder in os.listdir(train_path):
    for img in os.listdir(os.path.join(train_path, folder)):
        train_img_path.append(os.path.join(train_path, folder, img))
        train_img_label.append(classes.index(folder))

val_img_path = []
val_img_label = []
for folder in os.listdir(val_path):
    for img in os.listdir(os.path.join(val_path, folder)):
        val_img_path.append(os.path.join(val_path, folder, img))
        val_img_label.append(classes.index(folder))

# Подготовка датасета (до добавления аугментаций)

In [3]:
# Для работы с библиотекой albumentations придется написать кастомный датасет
class MyDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.labels[idx]

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image, label

In [4]:
# Трансформации
train_transform = A.Compose([
    A.Resize(150, 150), # Выдавало ошибку, видимо некоторые изображения отличаются по размеру
    A.Normalize(mean=(0.485, 0.456, 0.406), 
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(150, 150),
    A.Normalize(mean=(0.485, 0.456, 0.406), 
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Dataloaders
train_dataset = MyDataset(image_paths=train_img_path, labels=train_img_label, transform=train_transform)
train_loader = DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=0, # Тоже проблему выдавало, оказывается из-за многопоточности
                          pin_memory=True)

val_dataset = MyDataset(image_paths=val_img_path, labels=val_img_label, transform=val_transform)
val_loader = DataLoader(val_dataset,
                        batch_size=128,
                        shuffle=False,
                        num_workers=0,
                        pin_memory=True)

print(f'Обучающих примеров: {len(train_dataset)}')
print(f'Валидационных примеров: {len(val_dataset)}')
print(f'Количество классов: {len(classes)}')

Обучающих примеров: 14034
Валидационных примеров: 3000
Количество классов: 6


# Создание базовой модели (Baseline)

In [5]:
class BaseCNN(nn.Module):
    def __init__(self, num_classes=6):
        super(BaseCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        # Batch Normalization layers
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)

        # Pooling
        self.pool = nn.MaxPool2d(2, 2)

        # Dropout
        self.dropout = nn.Dropout(0.5)

        # Full connected layers
        self.fc1 = nn.Linear(256 * 9 * 9, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)

    def forward(self, x):
        # Convolution
        x = self.pool(F.relu(self.bn1(self.conv1(x)))) # Conv1 -> BatchNorm1 -> ReLU -> MaxPooling
        x = self.pool(F.relu(self.bn2(self.conv2(x)))) # Conv2 -> BatchNorm2 -> ReLU -> MaxPooling
        x = self.pool(F.relu(self.bn3(self.conv3(x)))) # Conv3 -> BatchNorm3 -> ReLU -> MaxPooling
        x = self.pool(F.relu(self.bn4(self.conv4(x)))) # Conv4 -> BatchNorm4 -> ReLU -> MaxPooling

        # Flatten
        x = x.view(x.size(0), -1)

        # Full connection
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x

In [6]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(train_loader, desc='Training')
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        total += target.size(0)
        correct += pred.eq(target.view_as(pred)).sum().item()

        # Обновление прогресс-бара
        pbar.set_postfix({
            'Loss': f'{running_loss/total:.4f}',
            'Acc': f'{100.*correct/total:.2f}%'
        })

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc

def val_epoch(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(val_loader, desc='Validation')
    
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
    
            running_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            total += target.size(0)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
            # Обновление прогресс-бара
            pbar.set_postfix({
                'Loss': f'{running_loss/total:.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc

In [7]:
# Создадим модель сверточной нейросети
model = BaseCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

In [None]:
# Основной цикл обучения
def train_model(model, criterion, optimizer, scheduler, epochs=20):
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    best_val_acc = 0.0
    start_time = time.time()

    print("Начинаем обучение...")
    print("=" * 60)

    for epoch in range(epochs):
        epoch_start = time.time()

        # Обучение
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)

        # Валидация
        val_loss, val_acc = val_epoch(model, val_loader, criterion, device)

        # Обновление планировщика
        scheduler.step(val_loss)

        # Сохранение метрик
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        # Время эпохи
        epoch_time = time.time() - epoch_start

        # Вывод результатов
        print(f'\nEpoch {epoch+1}/{epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100.:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100.:.2f}%')
        print(f'Time: {epoch_time:.1f}s, LR: {scheduler.get_last_lr()[0]:.6f}')

        # Сохранение лучшей модели
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f'★ Новая лучшая модель! Accuracy: {best_val_acc*100.:.2f}%')

        print("-" * 60)

    total_time = time.time() - start_time
    print(f'\nОбучение завершено!')
    print(f'Общее время: {total_time/60:.1f} минут')
    print(f'Лучшая валидационная точность: {best_val_acc*100.:.2f}%')

    return train_losses, train_accuracies, val_losses, val_accuracies

# Запуск обучения
history = train_model(model, criterion, optimizer, scheduler)

Начинаем обучение...


Training: 100%|██████████| 110/110 [01:55<00:00,  1.05s/it, Loss=0.0100, Acc=52.62%]
Validation: 100%|██████████| 24/24 [00:23<00:00,  1.01it/s, Loss=0.0075, Acc=64.90%]



Epoch 1/20:
Train Loss: 0.0100, Train Acc: 52.62%
Val Loss: 0.0075, Val Acc: 64.90%
Time: 139.0s, LR: 0.001000
★ Новая лучшая модель! Accuracy: 64.90%
------------------------------------------------------------


Training:  79%|███████▉  | 87/110 [00:30<00:08,  2.62it/s, Loss=0.0063, Acc=70.56%]

# Анализ результатов

In [None]:
def plot_training_history(history):
    train_losses, train_accuracies, val_losses, val_accuracies = history
    epochs = range(1, len(train_losses) + 1)
    
    fig, ax = plt.subplots(1, 2, figsize=(15, 5))

    # Loss plot
    ax[0].plot(epochs, train_losses, 'bo-', label='Train loss', lw=2)
    ax[0].plot(epochs, val_losses, 'ro-', label='Validation loss', lw=2)
    ax[0].set_title('Training and Validation Loss')
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('Loss')
    ax[0].legend()
    ax[0].grid(True, alpha=0.3)

    # Accuracy plot
    ax[1].plot(epochs, train_accuracies, 'bo-', label='Train accuracy', lw=2)
    ax[1].plot(epochs, val_accuracies, 'ro-', label='Validation accuracy', lw=2)
    ax[1].set_title('Training and Validation Accuracy')
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('Accuracy')
    ax[1].legend()
    ax[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    best_train_acc = max(train_accuracies)
    best_val_acc = max(val_accuracies)
    final_train_loss = train_losses[-1]
    final_val_loss = val_losses[-1]

    print('Final results')
    print(f'Best training accuracy: {best_train_acc*100.:.2f}%')
    print(f'Best validation accuracy: {best_val_acc*100.:.2f}%')
    print(f'Final train loss: {final_train_loss:.4f}')
    print(f'Final validation loss: {final_val_loss:.4f}')

plot_training_history(history)

In [None]:
# Детальная оценка на тестовом наборе
def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    y_proba = []
    
    with torch.no_grad():
        for data, target in tqdm(test_loader, desc='Evaluation'):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            proba = F.softmax(output, dim=1)
            
            y_true.extend(target.cpu().numpy())
            y_pred.extend(pred.cpu().numpy())
            y_proba.extend(proba.cpu().numpy())
    
    return np.array(y_true), np.array(y_pred), np.array(y_proba)

y_true, y_pred, y_proba = evaluate_model(model, val_loader)

# Матрица ошибок
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.tight_layout()
    plt.show()
    
    # Accuracy по классам
    class_accuracy = cm.diagonal() / cm.sum(axis=1)
    
    print("\nТочность по классам:")
    for i, acc in enumerate(class_accuracy):
        print(f"{classes[i]}: {acc:.3f}")

plot_confusion_matrix(y_true, y_pred, classes)

# Отчет по классификации
print("\nДетальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

In [None]:
# Визуализация предсказаний
def visualize_predictions(model, test_loader, classes, num_images=20):
    model.eval()
    images_shown = 0
    
    fig, axes = plt.subplots(5, 4, figsize=(15, 15))
    axes = axes.ravel()
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            proba = F.softmax(output, dim=1)
            
            for i in range(data.size(0)):
                if images_shown >= num_images:
                    break
                
                img = data[i].cpu()
                img = torch.clamp(img, 0, 1)
                
                # Отображение
                axes[images_shown].imshow(img.permute(1, 2, 0))
                
                true_label = classes[target[i]]
                pred_label = classes[pred[i]]
                confidence = proba[i][pred[i]].item()
                
                # Цвет: зеленый для правильных, красный для неправильных
                color = 'green' if target[i] == pred[i] else 'red'
                
                axes[images_shown].set_title(
                    f'True: {true_label}\nPred: {pred_label} ({confidence:.2f})',
                    color=color, fontsize=10
                )
                axes[images_shown].axis('off')
                
                images_shown += 1
            
            if images_shown >= num_images:
                break
    
    plt.tight_layout()
    plt.show()

visualize_predictions(model, val_loader, classes)

# Добавление аугментаций

## 1. Базовые геометрические преобразования
* Горизонтальные отражения (horizontal flip)
* Повороты (rotation) ±15°


In [None]:
def run_experiment(ratio):
    # Создадим модель сверточной нейросети
    model = BaseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

    # Training
    train_transform = A.Compose([
        A.Resize(150, 150),
        A.HorizontalFlip(p=ratio),
        A.Rotate(limit=(-15, 15), p=ratio),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_dataset = MyDataset(image_paths=train_img_path, 
                              labels=train_img_label, 
                              transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=128,
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)

    # Validation
    val_transform = A.Compose([
        A.Resize(150, 150),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_dataset = MyDataset(image_paths=val_img_path, 
                            labels=val_img_label, 
                            transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=128,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    
    # Запуск обучения
    history = train_model(model, criterion, optimizer, scheduler)

### 25% аугментированных данных

In [None]:
run_experiment(ratio=0.25)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 50% аугментированных данных

In [None]:
run_experiment(ratio=0.5)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 75% аугментированных данных

In [None]:
run_experiment(ratio=0.75)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 100% аугментированных данных

In [None]:
run_experiment(ratio=1.0)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

## 2. Добавление искажений
* Предыдущие аугментации +
* Случайный зум (random zoom) ±10%
* Сдвиги (translation) ±10%

In [None]:
def run_experiment(ratio):
    # Создадим модель сверточной нейросети
    model = BaseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

    # Training
    train_transform = A.Compose([
        A.Resize(150, 150),
        A.HorizontalFlip(p=ratio),
        A.Rotate(limit=(-15, 15), p=ratio),
        A.RandomScale(scale_limit=(0.9, 1.1), p=ratio),
        A.ShiftScaleRotate(shift_limit=(0.1, 0.1), scale_limit=0, 
                           rotate_limit=0, border_mode=cv2.BORDER_REFLECT, p=ratio),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_dataset = MyDataset(image_paths=train_img_path, 
                              labels=train_img_label, 
                              transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=128,
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)

    # Validation
    val_transform = A.Compose([
        A.Resize(150, 150),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_dataset = MyDataset(image_paths=val_img_path, 
                            labels=val_img_label, 
                            transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=128,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    
    # Запуск обучения
    history = train_model(model, criterion, optimizer, scheduler)

### 25% аугментированных данных

In [None]:
run_experiment(ratio=0.25)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 50% аугментированных данных

In [None]:
run_experiment(ratio=0.5)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 75% аугментированных данных

In [None]:
run_experiment(ratio=0.75)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 100% аугментированных данных

In [None]:
run_experiment(ratio=1.0)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

## 3. Цветовые и яркостные преобразования
* Предыдущие аугментации +
* Изменение яркости (brightness) ±20%
* Изменение контраста (contrast) ±20%

In [None]:
def run_experiment(ratio):
    # Создадим модель сверточной нейросети
    model = BaseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

    # Training
    train_transform = A.Compose([
        A.Resize(150, 150),
        A.HorizontalFlip(p=ratio),
        A.Rotate(limit=(-15, 15), p=ratio),
        A.RandomScale(scale_limit=(0.9, 1.1), p=ratio),
        A.ShiftScaleRotate(shift_limit=(0.1, 0.1), scale_limit=0, 
                           rotate_limit=0, border_mode=cv2.BORDER_REFLECT, p=ratio),
        A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=ratio),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_dataset = MyDataset(image_paths=train_img_path, 
                              labels=train_img_label, 
                              transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=128,
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)

    # Validation
    val_transform = A.Compose([
        A.Resize(150, 150),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_dataset = MyDataset(image_paths=val_img_path, 
                            labels=val_img_label, 
                            transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=128,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    
    # Запуск обучения
    history = train_model(model, criterion, optimizer, scheduler)

### 25% аугментированных данных

In [None]:
run_experiment(ratio=0.25)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 50% аугментированных данных

In [None]:
run_experiment(ratio=0.5)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 75% аугментированных данных

In [None]:
run_experiment(ratio=0.75)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 100% аугментированных данных

In [None]:
run_experiment(ratio=1.0)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

## 4. Продвинутые техники
* Предыдущие аугментации +
* Размытие (Gaussian blur)
* Добавление шума (noise injection)

In [None]:
def run_experiment(ratio):
    # Создадим модель сверточной нейросети
    model = BaseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

    # Training
    train_transform = A.Compose([
        A.Resize(150, 150),
        A.HorizontalFlip(p=ratio),
        A.Rotate(limit=(-15, 15), p=ratio),
        A.RandomScale(scale_limit=(0.9, 1.1), p=ratio),
        A.ShiftScaleRotate(shift_limit=(0.1, 0.1), scale_limit=0, 
                           rotate_limit=0, border_mode=cv2.BORDER_REFLECT, p=ratio),
        A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=ratio),
        A.GaussianBlur(sigma_limit=(0.1, 0.2), p=ratio),
        A.GaussNoise(p=ratio),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_dataset = MyDataset(image_paths=train_img_path, 
                              labels=train_img_label, 
                              transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=128,
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)

    # Validation
    val_transform = A.Compose([
        A.Resize(150, 150),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_dataset = MyDataset(image_paths=val_img_path, 
                            labels=val_img_label, 
                            transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=128,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    
    # Запуск обучения
    history = train_model(model, criterion, optimizer, scheduler)

### 25% аугментированных данных

In [None]:
run_experiment(ratio=0.25)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 50% аугментированных данных

In [None]:
run_experiment(ratio=0.5)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 75% аугментированных данных

In [None]:
run_experiment(ratio=0.75)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 100% аугментированных данных

In [None]:
run_experiment(ratio=1.0)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

## 5. Современные аугментации
* Предыдущие аугментации +
* Random Erasing
* CutMix

In [None]:
# CutMix в Albumentations нет, придется писать кастомный
class CutMix(A.DualTransform):
    def __init__(self, always_apply=False, p=0.5):
        super().__init__(always_apply, p)

    def apply(self, image, **params):
        return image

    def get_params_dependent_on_targets(self, params):
        image = params["image"]
        h, w, _ = image.shape
        cut_w = w // 2
        cut_h = h // 2
        x1 = np.random.randint(0, w - cut_w)
        y1 = np.random.randint(0, h - cut_h)
        return {"x1": x1, "y1": y1, "cut_w": cut_w, "cut_h": cut_h}

    def apply_with_other(self, image, other_image, x1, y1, cut_w, cut_h, **params):
        img = image.copy()
        img[y1:y1+cut_h, x1:x1+cut_w] = other_image[y1:y1+cut_h, x1:x1+cut_w]
        return img

    @property
    def targets_as_params(self):
        return ["image"]

    def update_params(self, params, **kwargs):
        return params

In [None]:
def run_experiment(ratio):
    # Создадим модель сверточной нейросети
    model = BaseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

    # Training
    train_transform = A.Compose([
        A.Resize(150, 150),
        A.HorizontalFlip(p=ratio),
        A.Rotate(limit=(-15, 15), p=ratio),
        A.RandomScale(scale_limit=(0.9, 1.1), p=ratio),
        A.ShiftScaleRotate(shift_limit=(0.1, 0.1), scale_limit=0, 
                           rotate_limit=0, border_mode=cv2.BORDER_REFLECT, p=ratio),
        A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=ratio),
        A.GaussianBlur(sigma_limit=(0.1, 0.2), p=ratio),
        A.GaussNoise(p=ratio),
        A.Erasing(p=ratio),
        CutMix(p=ratio),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_dataset = MyDataset(image_paths=train_img_path, 
                              labels=train_img_label, 
                              transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=128,
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)

    # Validation
    val_transform = A.Compose([
        A.Resize(150, 150),
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_dataset = MyDataset(image_paths=val_img_path, 
                            labels=val_img_label, 
                            transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=128,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    
    # Запуск обучения
    history = train_model(model, criterion, optimizer, scheduler)

### 25% аугментированных данных

In [None]:
run_experiment(ratio=0.25)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 50% аугментированных данных

In [None]:
run_experiment(ratio=0.5)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 75% аугментированных данных

In [None]:
run_experiment(ratio=0.75)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

### 100% аугментированных данных

In [None]:
run_experiment(ratio=1.0)

In [None]:
# Графики потерь и точности
plot_training_history(history)

In [None]:
y_true, y_pred, y_proba = evaluate_model(model, val_loader) # Оценка на тестовом наборе
plot_confusion_matrix(y_true, y_pred, classes) # Матрица ошибок

In [None]:
# Отчет по классификации
print("Детальный отчет:")
print(classification_report(y_true, y_pred, target_names=classes))

# Исследовательские вопросы для анализа

## 1. Какое соотношение аугментированных данных к исходным является оптимальным?

### Ответ

## 2. Зависит ли качество предсказаний от количества различных способов аугментации?

### Ответ

## 3. На каком этапе добавления аугментаций наблюдается наибольший прирост качества?

### Ответ

## 4. Есть ли признак переобучения при использовании слишком агрессивных аугментаций?