## Курсовая работа по курсу ФКИИ

Выполнил cтудент группы М8О-109СВ-24, Сорокин Никита

In [None]:
import os
import time
from tempfile import TemporaryDirectory

import matplotlib.pyplot as plt
import numpy as np
import shap
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torchvision
from captum.attr import InputXGradient
from lime import lime_image
from PIL import Image
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from skimage.segmentation import mark_boundaries
from skimage.transform import resize
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
from torchvision.models.resnet import BasicBlock

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # Панацея от отвала ядра 

## Скачивание небольшого датасета с картинками:

In [None]:
#!/bin/bash
!curl -L -o cats-dogs-datset.zip https://www.kaggle.com/api/v1/datasets/download/samuelcortinhas/cats-and-dogs-image-classification

In [None]:
import zipfile

with zipfile.ZipFile("cats-dogs-datset.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset")
print("Распаковка завершена.")

### Деление датасета на train/test

In [None]:
import os
import random
import shutil

# Исходная папка с подкатегориями
source_dir = 'dataset/dataset'
target_root = 'dataset_split'  # Новая папка для train/val/test

splits = ['train', 'val', 'test']
split_ratios = {'train': 0.8, 'val': 0.1, 'test': 0.1}

classes = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]

for cls in classes:
    cls_dir = os.path.join(source_dir, cls)
    images = os.listdir(cls_dir)
    random.shuffle(images)

    n_total = len(images)
    n_train = int(n_total * split_ratios['train'])
    n_val = int(n_total * split_ratios['val'])
    
    split_data = {
        'train': images[:n_train],
        'val': images[n_train:n_train+n_val],
        'test': images[n_train+n_val:]
    }

    for split in splits:
        split_dir = os.path.join(target_root, split, cls)
        os.makedirs(split_dir, exist_ok=True)
        for img_name in split_data[split]:
            src = os.path.join(cls_dir, img_name)
            dst = os.path.join(split_dir, img_name)
            shutil.copy2(src, dst)

print("Разделение завершено: train / val / test.")


In [None]:
cudnn.benchmark = True
plt.ion()

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

data_dir = './dataset_split'

image_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
    for x in ['train', 'val', 'test']
}

dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                   shuffle=True if x != 'test' else False,
                                   num_workers=4)
    for x in ['train', 'val', 'test']
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
dataset_sizes

In [None]:
class_names

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

data_dir = './dataset_split'

image_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
    for x in ['train', 'val', 'test']
}

class_names = image_datasets['train'].classes

def imshow(inp_tensor, title=None):
    try:
        inp = inp_tensor.detach().cpu().numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)

        plt.figure(figsize=(6, 3))
        plt.imshow(inp)
        if title:
            plt.title(title)
        plt.axis('off')
        plt.pause(0.001)
        plt.show()
    except Exception as e:
        print("Ошибка в imshow:", e)

try:
    inputs, class_ids = next(iter(dataloaders['train']))
    inputs = inputs[:2]
    class_ids = class_ids[:2]
    out = torchvision.utils.make_grid(inputs)

    title = ' | '.join([class_names[x] for x in class_ids])
    imshow(out, title=title)

except StopIteration:
    print("Dataloader пуст — проверь содержимое dataset_split/train/")
except Exception as e:
    print("Ошибка при визуализации батча:", e)


# Натренируем модельку

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0

                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)
            
            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')
        model.load_state_dict(torch.load(best_model_params_path))
    return model


In [None]:
model_conv = torchvision.models.resnet18(weights='IMAGENET1K_V1')

for param in model_conv.parameters():
    param.requires_grad = False

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)
model_conv = model_conv.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)


In [None]:
model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=51)

In [None]:
def imshow(inp_tensor, title=None):
    """Отображение тензора изображения с денормализацией."""
    inp = inp_tensor.detach().cpu().numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.axis('off')


## Проверки:

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)


visualize_model(model_conv)

plt.ioff()
plt.show()


In [None]:
def visualize_model_predictions(model,img_path):
    was_training = model.training
    model.eval()

    img = Image.open(img_path)
    img = data_transforms['val'](img)
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.max(outputs, 1)

        ax = plt.subplot(2,2,1)
        ax.axis('off')
        ax.set_title(f'Predicted: {class_names[preds[0]]}')
        imshow(img.cpu().data[0])

        model.train(mode=was_training)

visualize_model_predictions(
    model_conv,
    img_path='./img/first picture.jpg'
)

plt.ioff()
plt.show()



# LIME:

In [None]:
def transform_image_for_model(img_path):
    img = Image.open(img_path).convert('RGB')
    img_tensor = data_transforms['val'](img)
    return img_tensor.unsqueeze(0).to(device)

def denormalize(tensor):
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
    return tensor.cpu() * std + mean

def batch_predict(images):
    model_conv.eval()
    batch = torch.stack([data_transforms['val'](Image.fromarray(img)).to(device) for img in images], dim=0)
    with torch.no_grad():
        logits = model_conv(batch)
    return torch.nn.functional.softmax(logits, dim=1).cpu().numpy()

def explain_prediction_with_lime(img_path):
    img = Image.open(img_path).convert('RGB').resize((256, 256))
    np_img = np.array(img)

    explainer = lime_image.LimeImageExplainer()
    explanation = explainer.explain_instance(
        np_img,
        batch_predict,
        top_labels=1,
        hide_color=0,
        num_samples=1000
    )

    temp, mask = explanation.get_image_and_mask(
        label=explanation.top_labels[0],
        positive_only=True,
        hide_rest=False,
        num_features=10,
        min_weight=0.0
    )

    plt.figure(figsize=(6, 6))
    plt.title(f"Predicted: {class_names[explanation.top_labels[0]]}")
    plt.imshow(mark_boundaries(temp / 255.0, mask))
    plt.axis('off')
    plt.show()


In [None]:
explain_prediction_with_lime("./img/first picture.jpg")

In [None]:
explain_prediction_with_lime("./img/second picture.jpg")

In [None]:
explain_prediction_with_lime("./img/third picture.jpg")

# SHAP:

In [None]:
def explain_with_shap(model, img_path, device, class_names):
    def patched_forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = out + identity
        out = self.relu(out)
        return out

    BasicBlock.forward = patched_forward

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)
    model.eval()

    with torch.no_grad():
        output = model(img_tensor)
    class_id = output.argmax(dim=1).item()
    print(f"Предсказанный класс: {class_names[class_id]}")

    model_cpu = model.cpu()
    for m in model_cpu.modules():
        if isinstance(m, torch.nn.ReLU):
            m.inplace = False

    img_tensor = img_tensor.cpu().detach().requires_grad_()
    from torchvision.transforms import GaussianBlur
    blur = GaussianBlur(kernel_size=15)
    background = blur(img_tensor)

    explainer = shap.GradientExplainer(model_cpu, background)
    shap_vals = explainer.shap_values(img_tensor)

    shap_img = shap_vals[0][:, :, :, 0]  # (3, H, W)
    if shap_img.ndim == 3:
        shap_map = np.mean(shap_img, axis=0)
    else:
        shap_map = shap_img

    def denorm(img):
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        return torch.clamp(img * std + mean, 0, 1).detach().numpy().transpose(1, 2, 0)

    img_vis = denorm(img_tensor[0])
    norm = np.clip((shap_map / (np.percentile(np.abs(shap_map), 98) + 1e-6) + 1) / 2, 0, 1)
    heatmap = plt.cm.seismic(norm)[..., :3]
    if heatmap.shape[:2] != img_vis.shape[:2]:
        heatmap = resize(heatmap, img_vis.shape[:2], preserve_range=True)
    overlay = np.clip(0.6 * heatmap + 0.4 * img_vis, 0, 1)

    fig, ax = plt.subplots(1, 3, figsize=(18, 5))
    ax[0].imshow(img_vis)
    ax[0].set_title("Оригинал")
    ax[1].imshow(overlay)
    ax[1].set_title("SHAP Overlay")
    vlim = np.percentile(np.abs(shap_map), 98)
    ax[2].imshow(np.abs(shap_map), cmap='hot', vmin=0, vmax=vlim, aspect='auto')
    ax[2].set_title("|SHAP|")
    for a in ax:
        a.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
explain_with_shap(model_conv, "./img/first picture.jpg", device, class_names)

In [None]:
explain_with_shap(model_conv, "./img/second picture.jpg", device, class_names)

In [None]:
explain_with_shap(model_conv, "./img/third picture.jpg", device, class_names)

# Captum:

In [None]:
def denormalize_image(tensor: torch.Tensor) -> torch.Tensor:
    mean = torch.tensor([0.485, 0.456, 0.406], device=tensor.device).view(3, 1, 1)
    std  = torch.tensor([0.229, 0.224, 0.225], device=tensor.device).view(3, 1, 1)
    return torch.clamp(tensor * std + mean, 0, 1)

def create_heatmap_overlay(img: torch.Tensor, at_2d: np.ndarray, alpha: float = 0.6) -> np.ndarray:
    img_np = denormalize_image(img.cpu().detach()).numpy().transpose(1, 2, 0)
    h, w, _ = img_np.shape

    at_2d = at_2d.astype(np.float32)
    at_2d = np.nan_to_num(at_2d)
    abs_max = np.percentile(np.abs(at_2d), 98)
    if abs_max == 0:
        abs_max = 1e-8
    norm = np.clip(at_2d / abs_max, -1, 1)
    norm = (norm + 1) / 2
    heatmap = plt.cm.seismic(norm)[..., :3]

    overlay = np.clip((1 - alpha) * img_np + alpha * heatmap, 0, 1)
    return overlay

def explain_with_captum(model, img_path, device, class_names):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)
    img_np = np.array(img.resize((224, 224))).astype(np.float32) / 255.0

    model.eval()
    img_tensor.requires_grad = True

    output = model(img_tensor)
    predicted_class = output.argmax(dim=1).item()
    print(f"Предсказанный класс: {class_names[predicted_class]}")

    explainer = InputXGradient(model)
    attributions = explainer.attribute(img_tensor, target=predicted_class)
    attr_np = attributions.squeeze().detach().cpu().numpy()
    at_gray = np.mean(attr_np, axis=0)

    overlay = create_heatmap_overlay(img_tensor[0], at_gray, alpha=0.6)

    vlim = np.percentile(np.abs(at_gray), 95)

    fig, axes = plt.subplots(1, 4, figsize=(20, 5))
    axes[0].imshow(img_np)
    axes[0].set_title('Оригинал')
    axes[0].axis('off')

    axes[1].imshow(overlay)
    axes[1].set_title('Captum Heatmap')
    axes[1].axis('off')

    axes[2].imshow(at_gray, cmap='seismic', vmin=-vlim, vmax=vlim)
    axes[2].set_title('Raw Attribution')
    axes[2].axis('off')

    axes[3].imshow(np.abs(at_gray), cmap='hot')
    axes[3].set_title('|Attribution|')
    axes[3].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
explain_with_captum(model_conv, "./img/first picture.jpg", device, class_names)

In [None]:
explain_with_captum(model_conv, "./img/second picture.jpg", device, class_names)

In [None]:
explain_with_captum(model_conv, "./img/third picture.jpg", device, class_names)

# GradCam:

In [None]:

def explain_prediction_with_gradcam(img_path):
    img = Image.open(img_path).convert('RGB')
    input_tensor = data_transforms['val'](img).unsqueeze(0).to(device)
    
    img_np = np.array(img.resize((224, 224))).astype(np.float32) / 255.0
    img_np = np.clip(img_np, 0, 1)

    for param in model_conv.parameters():
        param.requires_grad = True

    model_conv.train()

    target_layers = [model_conv.layer4[-1]]

    cam = GradCAM(model=model_conv, target_layers=target_layers)

    with torch.enable_grad(): 
        output = model_conv(input_tensor)
        pred_class = output.argmax().item()

        grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred_class)])

    cam_image = show_cam_on_image(img_np, grayscale_cam[0], use_rgb=True)

    plt.figure(figsize=(6, 6))
    plt.imshow(cam_image)
    plt.title(f"Grad-CAM — класс: {class_names[pred_class]}")
    plt.axis('off')
    plt.tight_layout()
    plt.show()


In [None]:
def gradcam_on_random_val_images(n=10):
    model_conv.train() 
    for param in model_conv.parameters():
        param.requires_grad = True

    indices = random.sample(range(len(image_datasets['val'])), n)

    target_layers = [model_conv.layer4[-1]]
    cam = GradCAM(model=model_conv, target_layers=target_layers)

    for idx in indices:
        img_tensor, label = image_datasets['val'][idx]
        input_tensor = img_tensor.unsqueeze(0).to(device)

        img_np = denormalize(img_tensor).numpy().transpose(1, 2, 0)
        img_np = np.clip(img_np, 0, 1)

        with torch.enable_grad():
            output = model_conv(input_tensor)
            pred_class = output.argmax().item()

            grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred_class)])

        cam_image = show_cam_on_image(img_np, grayscale_cam[0], use_rgb=True)

        plt.figure(figsize=(4, 4))
        plt.imshow(cam_image)
        plt.title(f"Predicted: {class_names[pred_class]}\nTrue: {class_names[label]}")
        plt.axis('off')
        plt.tight_layout()
        plt.show()


In [None]:
gradcam_on_random_val_images()


In [None]:
explain_prediction_with_gradcam("./img/first picture.jpg")

In [None]:
explain_prediction_with_gradcam("./img/second picture.jpg")

In [None]:
explain_prediction_with_gradcam("./img/third picture.jpg")