In [None]:
!pip install torchextractor

Collecting torchextractor
  Downloading torchextractor-0.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.4.0->torchextractor)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.4.0->torchextractor)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.4.0->torchextractor)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.4.0->torchextractor)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.4.0->torchextractor)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12

In [None]:
import torch
import sys
import numpy as np
import pickle as pkl
from os.path import join as oj
from datetime import datetime
import torch.optim as optim
import os
from torch.utils.data import TensorDataset, ConcatDataset, random_split
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score
import argparse
from PIL import Image
from tqdm import tqdm
from torch import nn
from numpy.random import randint
import torchvision.models as models
import time
import copy
import gc
import json
import pandas as pd
import torch
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
from scipy.stats import ttest_1samp
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount("/content/drive")
dir_path = "/content/drive/MyDrive/Masterthesis/Datasets/isic"

Mounted at /content/drive


In [None]:
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset

class ISICDataset(Dataset):
    def __init__(self, path: str = None, is_malignant: int = None, data_files=None, labels=None, group_labels = None, transform=None):

        self.path = path
        self.data_files = os.listdir(self.path)
        self.is_malignant = is_malignant
        self.group_labels = group_labels
        self.labels = labels
        self.transform = transform

    def __getitem__(self, i):

        img = Image.open(os.path.join(self.path, self.data_files[i]))

        # Apply the provided transform
        if self.transform:
            img = self.transform(img)

        # Return the label and group label
        is_malignant = self.is_malignant
        group_label = self.group_labels[i]
        return (img, is_malignant, group_label)

    def __len__(self):
        return len(self.data_files)


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
paths = {
    "benign_with_patch": "/content/drive/MyDrive/Masterthesis/Datasets/isic/data/dfr/benign_patch_200",
    "benign_with_no_patch": "/content/drive/MyDrive/Masterthesis/Datasets/isic/data/dfr/benign_no_patch_200",
    "malignant_with_patch": "/content/drive/MyDrive/Masterthesis/Datasets/isic/data/dfr/malignant_patch_200",
    "malignant_with_no_patch": "/content/drive/MyDrive/Masterthesis/Datasets/isic/data/dfr/malignant_no_patch_200",
}


# Create datasets for each group
datasets = {
    "benign_with_patch": ISICDataset(
        path=paths["benign_with_patch"],
        is_malignant=0,
        group_labels=[0] * len(os.listdir(paths["benign_with_patch"])),
        transform=transform,
    ),
    "benign_with_no_patch": ISICDataset(
        path=paths["benign_with_no_patch"],
        is_malignant=0,
        group_labels=[1] * len(os.listdir(paths["benign_with_no_patch"])),
        transform=transform,
    ),
    "malignant_with_patch": ISICDataset(
        path=paths["malignant_with_patch"],
        is_malignant=1,
        group_labels=[0] * len(os.listdir(paths["malignant_with_patch"])),
        transform=transform,
    ),
    "malignant_with_no_patch": ISICDataset(
        path=paths["malignant_with_no_patch"],
        is_malignant=1,
        group_labels=[1] * len(os.listdir(paths["malignant_with_no_patch"])),
        transform=transform,
    ),
}

balanced_dataset = ConcatDataset([
    datasets["benign_with_patch"],
    datasets["benign_with_no_patch"],
    datasets["malignant_with_patch"],
    datasets["malignant_with_no_patch"],
])

balanced_loader = DataLoader(
    balanced_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

In [None]:

# Load the pretrained AlexNet
model = models.alexnet(pretrained=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.classifier[-1] = nn.Linear(4096, 2)
model.load_state_dict(torch.load("/content/drive/MyDrive/Masterthesis/Datasets/isic/models/initial_classifier/alexnet_isic_1.pt", map_location=device))
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])  # Remove last layer
feature_extractor.eval()

# Extract features
all_features = []
all_labels = []
all_groups = []

with torch.no_grad():
    for batch in balanced_loader:
        images, labels, groups = batch
        features = feature_extractor(images).view(images.size(0), -1)  # Flatten features
        all_features.append(features)
        all_labels.append(labels)
        all_groups.append(groups)

# Concatenate features, labels, and groups
all_features = torch.cat(all_features)
all_labels = torch.cat(all_labels)
all_groups = torch.cat(all_groups)

print(f"Extracted Features: {all_features.shape}")


  model.load_state_dict(torch.load("/content/drive/MyDrive/Masterthesis/Datasets/isic/models/initial_classifier/alexnet_isic_1.pt", map_location=device))


Extracted Features: torch.Size([800, 9216])


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(all_features.numpy())

# Train logistic regression
clf = LogisticRegression(max_iter=100, class_weight="balanced")
clf.fit(scaled_features, all_labels.numpy())


In [None]:
from sklearn.metrics import accuracy_score

# Paths for test datasets
test_paths = {
    "test_malignant_no_patch_100": '/content/drive/MyDrive/Masterthesis/Datasets/isic/data/full/test/malignant_no_patch_100',
    "test_malignant_patch_100": '/content/drive/MyDrive/Masterthesis/Datasets/isic/data/full/test/malignant_patch_100',
    "test_benign_no_patch_100": '/content/drive/MyDrive/Masterthesis/Datasets/isic/data/full/test/benign_no_patch_100',
    "test_benign_patch_100": '/content/drive/MyDrive/Masterthesis/Datasets/isic/data/full/test/benign_patch_100',
}

# Define labels and groups for each test set
test_configs = {
    "test_malignant_no_patch_100": {"class_label": 1, "group_label": 1},
    "test_malignant_patch_100": {"class_label": 1, "group_label": 0},
    "test_benign_no_patch_100": {"class_label": 0, "group_label": 1},
    "test_benign_patch_100": {"class_label": 0, "group_label": 0},
}

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load datasets
test_datasets = {}
for name, path in test_paths.items():
    config = test_configs[name]
    test_datasets[name] = ISICDataset(
        path=path,
        is_malignant=config["class_label"],
        group_labels=[config["group_label"]] * len(os.listdir(path)),
        transform=transform,
    )


test_features = {}
test_labels = {}
test_groups = {}

with torch.no_grad():
    for name, dataset in test_datasets.items():
        loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)
        features = []
        labels = []
        groups = []
        for batch in loader:
            images, label, group = batch
            extracted_features = feature_extractor(images).view(images.size(0), -1)  # Flatten features
            features.append(extracted_features)
            labels.append(label)
            groups.append(group)

        # Concatenate features, labels, and groups
        test_features[name] = torch.cat(features)
        test_labels[name] = torch.cat(labels)
        test_groups[name] = torch.cat(groups)

        print(f"Extracted Features for {name}: {test_features[name].shape}")


Extracted Features for test_malignant_no_patch_100: torch.Size([100, 9216])
Extracted Features for test_malignant_patch_100: torch.Size([100, 9216])
Extracted Features for test_benign_no_patch_100: torch.Size([100, 9216])
Extracted Features for test_benign_patch_100: torch.Size([100, 9216])


In [None]:
# Scale the test features
scaled_test_features = {}
for name, features in test_features.items():
    scaled_test_features[name] = scaler.transform(features.numpy())

# Initialize variables to store overall predictions and labels
all_predictions = []
all_labels = []

test_accuracies = {}
for name, features in scaled_test_features.items():
    predictions = clf.predict(features)
    accuracy = accuracy_score(test_labels[name].numpy(), predictions)
    test_accuracies[name] = accuracy
    print(f"Accuracy for {name}: {accuracy * 100:.2f}%")

    # Collect all predictions and labels for overall accuracy
    all_predictions.extend(predictions)
    all_labels.extend(test_labels[name].numpy())

# Compute overall accuracy
overall_accuracy = accuracy_score(all_labels, all_predictions)
print(f"Overall Accuracy: {overall_accuracy * 100:.2f}%")

Accuracy for test_malignant_no_patch_100: 76.00%
Accuracy for test_malignant_patch_100: 92.00%
Accuracy for test_benign_no_patch_100: 81.00%
Accuracy for test_benign_patch_100: 99.00%
Overall Accuracy: 87.00%
