In [1]:
import torch
import pandas as pd
import numpy as np
import cv2
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from google.colab import drive
import os
from sklearn.model_selection import train_test_split
import wandb
import torch.nn.functional as F
import timm
from torch import nn
from sklearn.metrics import roc_curve
import math
from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from transformers import ViTForImageClassification

In [2]:
drive.mount('/content/drive')
image_folder = '/content/drive/MyDrive/Курсовая/images/'
masks_folder = '/content/drive/MyDrive/Курсовая/masks_full/'
metadata_path = '/content/drive/MyDrive/Курсовая/HAM10000_metadata.tab'
ISIC2017_malignant  = '/content/drive/MyDrive/Курсовая/ISIC_2017_only_malignant/'
masks_ISIC2017_malignant = '/content/drive/MyDrive/Курсовая/ISIC_2017_only_malignant_masks/'
ISIC2017_malignant_5703 = '/content/drive/MyDrive/Курсовая/ISIC_malignant_5703/'
masks_ISIC2017_malignant_5703 = '/content/drive/MyDrive/Курсовая/ISIC_malignant_5703_masksbymodel/'
seg_model_path = '/content/drive/MyDrive/Курсовая/best_model_deeplabv3_26.04.25.pth'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:

df = pd.read_csv(metadata_path, sep='\t', header=None)
df.columns = ['lesion_id', 'image_id', 'diagnosis', 'type_of_diagnostic', 'age', 'sex', 'localization', 'source']
df['image_path'] = df['image_id'].apply(lambda x: os.path.join(image_folder, x + '.jpg'))
df['mask_path'] = df['image_id'].apply(lambda x: os.path.join(masks_folder, x + '.png'))
benign_diagnoses = ['bkl', 'nv', 'df', 'vasc']  # доброкачественные будут 0, злокачественные 1
df['label'] = df['diagnosis'].apply(lambda x: 0 if x in benign_diagnoses else 1)
df.drop(columns=['diagnosis'], inplace=True)
df.head()

Unnamed: 0,lesion_id,image_id,type_of_diagnostic,age,sex,localization,source,image_path,mask_path,label
0,HAM_0000118,ISIC_0027419,histo,80.0,male,scalp,vidir_modern,/content/drive/MyDrive/Курсовая/images/ISIC_00...,/content/drive/MyDrive/Курсовая/masks_full/ISI...,0
1,HAM_0000118,ISIC_0025030,histo,80.0,male,scalp,vidir_modern,/content/drive/MyDrive/Курсовая/images/ISIC_00...,/content/drive/MyDrive/Курсовая/masks_full/ISI...,0
2,HAM_0002730,ISIC_0026769,histo,80.0,male,scalp,vidir_modern,/content/drive/MyDrive/Курсовая/images/ISIC_00...,/content/drive/MyDrive/Курсовая/masks_full/ISI...,0
3,HAM_0002730,ISIC_0025661,histo,80.0,male,scalp,vidir_modern,/content/drive/MyDrive/Курсовая/images/ISIC_00...,/content/drive/MyDrive/Курсовая/masks_full/ISI...,0
4,HAM_0001466,ISIC_0031633,histo,75.0,male,ear,vidir_modern,/content/drive/MyDrive/Курсовая/images/ISIC_00...,/content/drive/MyDrive/Курсовая/masks_full/ISI...,0


In [4]:
# добавляем данные о malignant и их масках из ISIC2017

image_ids = [os.path.splitext(f)[0] for f in os.listdir(ISIC2017_malignant) if f.lower().endswith('.jpg')]
df_new = pd.DataFrame({'image_id': image_ids,
                       'image_path' : [os.path.join(ISIC2017_malignant, img + '.jpg') for img in image_ids],
                       'mask_path' : [os.path.join(masks_ISIC2017_malignant, img + '.png') for img in image_ids],
                       'label' : 1})

for col in ['lesion_id', 'type_of_diagnostic', 'age', 'sex', 'localization', 'source']:
    df_new[col] = None
cols = df.columns.tolist()
df_new = df_new[cols]
df = pd.concat([df, df_new], ignore_index=True)
df.tail()

  df = pd.concat([df, df_new], ignore_index=True)


Unnamed: 0,lesion_id,image_id,type_of_diagnostic,age,sex,localization,source,image_path,mask_path,label
10414,,ISIC_0015190,,,,,,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,1
10415,,ISIC_0015082,,,,,,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,1
10416,,ISIC_0015200,,,,,,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,1
10417,,ISIC_0015204,,,,,,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,1
10418,,ISIC_0015110,,,,,,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,/content/drive/MyDrive/Курсовая/ISIC_2017_only...,1


In [5]:
# добавляем данные о malignant (рандомные из ISIC) и их масках (сформированные моделью сегментации)

image_ids = [os.path.splitext(f)[0] for f in os.listdir(ISIC2017_malignant_5703) if f.lower().endswith('.jpg')]
df_new = pd.DataFrame({'image_id': image_ids,
                       'image_path' : [os.path.join(ISIC2017_malignant_5703, img + '.jpg') for img in image_ids],
                       'mask_path' : [os.path.join(masks_ISIC2017_malignant_5703, img + '.png') for img in image_ids],
                       'label' : 1})

for col in ['lesion_id', 'type_of_diagnostic', 'age', 'sex', 'localization', 'source']:
    df_new[col] = None
cols = df.columns.tolist()
df_new = df_new[cols]
df = pd.concat([df, df_new], ignore_index=True)
df.tail()

  df = pd.concat([df, df_new], ignore_index=True)


Unnamed: 0,lesion_id,image_id,type_of_diagnostic,age,sex,localization,source,image_path,mask_path,label
16117,,ISIC_0057224,,,,,,/content/drive/MyDrive/Курсовая/ISIC_malignant...,/content/drive/MyDrive/Курсовая/ISIC_malignant...,1
16118,,ISIC_0055854,,,,,,/content/drive/MyDrive/Курсовая/ISIC_malignant...,/content/drive/MyDrive/Курсовая/ISIC_malignant...,1
16119,,ISIC_0056799,,,,,,/content/drive/MyDrive/Курсовая/ISIC_malignant...,/content/drive/MyDrive/Курсовая/ISIC_malignant...,1
16120,,ISIC_0054991,,,,,,/content/drive/MyDrive/Курсовая/ISIC_malignant...,/content/drive/MyDrive/Курсовая/ISIC_malignant...,1
16121,,ISIC_0021576,,,,,,/content/drive/MyDrive/Курсовая/ISIC_malignant...,/content/drive/MyDrive/Курсовая/ISIC_malignant...,1


In [6]:
train_df, tmp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(tmp_df, test_size=0.5, stratify=tmp_df['label'], random_state=42)

In [7]:
class DeepLabWithClassifier(nn.Module):
    def __init__(self, seg_model_path, num_classes_clf=2):
        super().__init__()
        # Здесь используется собственная дообученная модель сегментации deeplavb3 от 26 апреля 25 года
        self.deeplab = models.segmentation.deeplabv3_resnet101(pretrained=True)
        self.deeplab.classifier[4] = nn.Conv2d(256, 1, kernel_size=1)
        self.deeplab.load_state_dict(torch.load(seg_model_path, map_location='cpu'))
        for param in self.deeplab.backbone.parameters():  # заморозка encoder и сегм головы
            param.requires_grad = False
        for param in self.deeplab.classifier.parameters():
            param.requires_grad = False

        self.classification_head = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes_clf))

    def forward(self, x, seg_mask=None):
        features = self.deeplab.backbone(x)['out']  # (B, 2048, H/16, W/16)
        if seg_mask is not None:
            seg_mask = F.interpolate(seg_mask, size=features.shape[2:], mode='nearest')
            features = features * seg_mask
        clf_out = self.classification_head(features)
        return clf_out


def unfreeze_backbone(model):  # разморозка энкодера
    for param in model.deeplab.backbone.parameters():
        param.requires_grad = True

In [8]:
class SkinLesionDataset(Dataset):
    def __init__(self, dataframe, image_transform=None, mask_transform=None):
        self.dataframe = dataframe
        self.image_transform = image_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']
        mask_path = self.dataframe.iloc[idx]['mask_path']
        label = self.dataframe.iloc[idx]['label']

        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')

        if self.image_transform:
            image = self.image_transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, mask, label


image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

mask_transform = transforms.Compose([
    transforms.Resize((224, 224), interpolation=Image.NEAREST),
    transforms.ToTensor()])

loader_kwargs = {'batch_size': 32, 'num_workers': 2, 'pin_memory': True, 'prefetch_factor': 2, 'persistent_workers': True}

train_dataset = SkinLesionDataset(train_df, image_transform=image_transform, mask_transform=mask_transform)
val_dataset = SkinLesionDataset(val_df, image_transform=image_transform, mask_transform=mask_transform)
test_dataset = SkinLesionDataset(test_df, image_transform=image_transform, mask_transform=mask_transform)

'''train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)'''

train_dataloader = DataLoader(train_dataset, shuffle=True, drop_last=True, **loader_kwargs)
val_dataloader = DataLoader(val_dataset, shuffle=False, drop_last=False, **loader_kwargs)
test_dataloader = DataLoader(test_dataset, shuffle=False, drop_last=False, **loader_kwargs)


In [9]:
class ViTWithMask(torch.nn.Module):
    def __init__(self, vit_model):
        super(ViTWithMask, self).__init__()
        self.vit = vit_model

    def forward(self, pixel_values, seg_mask=None):
        # embeddings = self.vit.embeddings(pixel_values)
        embeddings = self.vit.vit.embeddings(pixel_values)
        if isinstance(embeddings, tuple):
            embeddings = embeddings[0]

        if seg_mask is not None:
            patch_size = self.vit.config.patch_size
            B, C, H, W = pixel_values.shape
            new_H, new_W = H // patch_size, W // patch_size

            seg_mask_resized = F.interpolate(seg_mask, size=(new_H, new_W), mode='nearest')
            seg_mask_flat = seg_mask_resized.view(B, -1)
            ones = torch.ones(B, 1, device=seg_mask_flat.device)
            seg_mask_flat = torch.cat([ones, seg_mask_flat], dim=1)
            seg_mask_flat = seg_mask_flat.float().unsqueeze(-1)
            embeddings = embeddings * seg_mask_flat

        # encoder_outputs = self.vit.encoder(embeddings)
        encoder_outputs = self.vit.vit.encoder(embeddings)
        if isinstance(encoder_outputs, tuple):
            hidden_states = encoder_outputs[0]
        else:
            hidden_states = encoder_outputs.last_hidden_state

        # cls_output = encoder_outputs[:, 0]
        cls_output = hidden_states[:, 0]
        if hasattr(self.vit, 'layernorm'):
            cls_output = self.vit.layernorm(cls_output)
        elif hasattr(self.vit, 'layer_norm'):
            cls_output = self.vit.layer_norm(cls_output)
        # else:
        #    pass
        # cls_output = self.vit.layernorm(cls_output)
        logits = self.vit.classifier(cls_output)
        return logits

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_deeplab = DeepLabWithClassifier(seg_model_path, num_classes_clf=2).to(device)
model_name = 'google/vit-base-patch16-224-in21k'
base_vit = ViTForImageClassification.from_pretrained(model_name, num_labels=2).to(device)
model_vit_wrapped = ViTWithMask(base_vit).to(device)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
model_deeplab.load_state_dict(torch.load('/content/best_recall.pth'))
model_deeplab.to(device)

DeepLabWithClassifier(
  (deeplab): DeepLabV3(
    (backbone): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [15]:
state = torch.load('/content/best_model.pth', map_location=device)
model_vit_wrapped.load_state_dict(state)

<All keys matched successfully>

In [16]:
def focal_loss_new(outputs, targets, alpha=0.25, gamma=2.0):
    ce_loss = F.cross_entropy(outputs, targets, reduction='none')
    probs = torch.softmax(outputs, dim=1)
    pt = probs[range(len(targets)), targets]
    alpha_factor = torch.where(targets==1, alpha, 1-alpha)
    focal_weight = alpha_factor * (1 - pt) ** gamma
    loss = focal_weight * ce_loss
    return loss.mean()

In [17]:
def evaluate(model, dataloader, threshold=0.5):
    model.eval()
    total_loss = 0.0
    total = tp = fn = fp = 0
    with torch.no_grad():
        for images, masks, labels in dataloader:
            images, masks, labels = images.to(device), masks.to(device), labels.to(device)
            outputs = model(images, seg_mask=masks)
            # loss = focal_loss(outputs, labels, alpha=0.25, gamma=2.0)
            loss = focal_loss_new(outputs, labels, alpha=0.25, gamma=2.0)
            total_loss += loss.item() * labels.size(0)
            probs = torch.softmax(outputs, dim=1)
            preds = (probs[:,1] > threshold).long()

            total += labels.size(0)
            # correct += (preds == labels).sum().item()

            tp += ((preds == 1) & (labels == 1)).sum().item()
            fn += ((preds == 0) & (labels == 1)).sum().item()
            fp += ((preds == 1) & (labels == 0)).sum().item()

    avg_loss = total_loss / total
    # accuracy = 100 * correct / total
    # recall = 100 * tp / (tp + fn + 1e-8)
    # return avg_loss, accuracy, recall
    recall = 100 * tp / (tp + fn + 1e-8)
    precision = 100 * tp / (tp + fp + 1e-8)
    '''beta2 = 4.0
    f2 = (1 + beta2) * precision * recall / (beta2 * precision + recall + 1e-8)'''
    f1 = 2 * precision * recall / (precision + recall + 1e-8)
    accuracy = 100 * (tp + (total - tp - fn - fp)) / total
    return avg_loss, accuracy, recall, precision, f1

In [18]:
thresholds = [0.2, 0.25, 0.3]

for name, m in [('DeepLab', model_deeplab), ('ViT', model_vit_wrapped)]:
    print(f'\nМодель: {name}')
    for thr in thresholds:
        test_loss, test_acc, test_rec, test_prec, test_f1 = evaluate(m, test_dataloader, threshold=thr)
        print(f'Threshold={thr:.2f} — Loss: {test_loss:.4f}, '
              f'Acc: {test_acc:.2f}%, Recall: {test_rec:.2f}%, '
              f'Precision: {test_prec:.2f}%, F1: {test_f1:.2f}%')


Модель: DeepLab
Threshold=0.20 — Loss: 0.0440, Acc: 85.61%, Recall: 97.44%, Precision: 78.80%, F1: 87.13%
Threshold=0.25 — Loss: 0.0440, Acc: 87.31%, Recall: 96.11%, Precision: 81.72%, F1: 88.33%
Threshold=0.30 — Loss: 0.0440, Acc: 88.71%, Recall: 95.53%, Precision: 84.06%, F1: 89.43%

Модель: ViT
Threshold=0.20 — Loss: 0.0380, Acc: 84.04%, Recall: 97.68%, Precision: 76.74%, F1: 85.95%
Threshold=0.25 — Loss: 0.0380, Acc: 86.61%, Recall: 96.53%, Precision: 80.54%, F1: 87.81%
Threshold=0.30 — Loss: 0.0380, Acc: 88.18%, Recall: 94.62%, Precision: 83.81%, F1: 88.89%
