In [1]:
# !pip freeze > requirements.txt
# !pip install -r requirements.txt

In [18]:
import datetime
from collections import Counter
import json
import os
import pytz
import random

import numpy as np
import torch
import torch.nn as nn
import torch.optim
from torch.utils.data import Dataset, DataLoader, random_split, Sampler
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from torchvision import models, transforms
from tqdm import tqdm
from PIL import Image
from sklearn.metrics import (confusion_matrix,
                             accuracy_score,
                             precision_score,
                             recall_score,
                             f1_score,
                             roc_auc_score)
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

# Preprocessing

In [3]:
class PathLabelProcessor:
    def __init__(self, base_path, folder_name, pet_type, lesion, devices, symptom):
        self.base_path = base_path
        self.folder_name = folder_name
        self.pet_type = pet_type
        self.lesion = lesion
        self.devices = devices
        self.symptom = symptom

        self.label_images()

    def find_folders_by_name(self):
        matching_folders = []

        for root, dirs, files in os.walk(self.base_path):
            for dir_name in dirs:
                if self.folder_name in dir_name:
                    folder_path = os.path.join(root, dir_name)
                    matching_folders.append(folder_path)

        return matching_folders

    def find_image_json_pairs(self, folder_path):
        image_paths = []
        json_paths = []

        for root, dirs, files in os.walk(folder_path):
            for image_file in filter(lambda x: x.lower().endswith(('jpg', 'png')), files):
                image_path = os.path.join(root, image_file)
                json_file = f"{os.path.splitext(image_path)[0]}.json"
                if os.path.isfile(json_file):
                    image_paths.append(image_path)
                    json_paths.append(json_file)

        return image_paths, json_paths

    def is_symptomatic(self, data):
        return data['label']['label_disease_lv_3'] in self.symptom and data['label']['label_disease_nm'] == self.lesion

    def label_images(self):
        self.labeled_image_paths = []

        for folder_path in self.find_folders_by_name():
            image_paths, json_paths = self.find_image_json_pairs(folder_path)

            for image_path, json_path in zip(image_paths, json_paths):
                with open(json_path) as f:
                    data = json.load(f)

                if data['images']['meta']['device'] not in self.devices:
                    continue

                label = 0 if self.is_symptomatic(data) and self.pet_type in os.path.dirname(image_path).lower() else 1
                self.labeled_image_paths.append((image_path, label))

        symptomatic_count = sum(1 for _, label in self.labeled_image_paths if label == 0)
        asymptomatic_count = sum(1 for _, label in self.labeled_image_paths if label == 1)
        
        weight_class_0 = 1.0 / symptomatic_count
        weight_class_1 = 1.0 / asymptomatic_count
        self.class_weights = torch.tensor([weight_class_0, weight_class_1])

        print(f'Total cases: {len(self.labeled_image_paths)}')
        print(f'Number of symptomatic cases: {symptomatic_count}, Number of asymptomatic cases: {asymptomatic_count}')

In [4]:
%%time
base_path = 'eye/Train'
folder_name = '일반'
'''
['유']
dog: 안검염, 안검종양, 안검내반증, 유루증, 색소침착성각막염, 핵경화, 결막염
cat: 안검염, 결막염, 각막부골편, 비궤양성각막염, 각막궤양
['상', '하']
dog: 궤양성각막질환, 비궤양성각막질환
['초기', '비성숙', '성숙']
dog: 백내장
'''
pet_type = '개'
lesion = '안검내반증'
devices = ['스마트폰', '일반카메라']
symptom = ['유']

processor = PathLabelProcessor(base_path=base_path,
                               folder_name=folder_name,
                               pet_type=pet_type,
                               lesion=lesion,
                               devices=devices,
                               symptom=symptom)

data = processor.labeled_image_paths
class_weights = processor.class_weights

Total cases: 98646
Number of symptomatic cases: 5715, Number of asymptomatic cases: 92931
CPU times: user 5.42 s, sys: 1.62 s, total: 7.04 s
Wall time: 7.05 s


In [25]:
class Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path, label = self.data[idx]
        image = Image.open(image_path)
        image = self.transform(image)

        return image, label

class ImageDataset():
    def __init__(self,
                 data,
                 transform,
                 test_size,
                 seed,
                 batch_size,
                 shuffle,
                 num_workers):
        dataset = self.DatasetMaker(data, transform, test_size, seed)
        dataloader = self.DataLoaderMaker(dataset, batch_size, shuffle, num_workers)
        
        return dataloader
        
    def DatasetMaker(self, data, transform, test_size=None, seed=42):
        if test_size:
            train_data, val_data = train_test_split(data, 
                                                    test_size=test_size,
                                                    random_state=seed)
            dataset_dict = {'train': train_data,
                            'val': val_data}
            for k, v in dataset_dict.items():
                print(f"- Class Counts for {k}:")
                class_counts = {}
                for _, label in v:
                    class_counts[label] = class_counts.get(label, 0) + 1
                for class_label, count in class_counts.items():
                    print(f"  Class {class_label}: {count} samples")

        dataset = {k: Dataset(v, transform[k])
                   for k, v in dataset_dict.items()}
        
        self.inspect_data(dataset)
        
        return dataset
        
    def DataLoaderMaker(self, dataset, batch_size, shuffle, num_workers):
        dataloader = {k: DataLoader(dataset=dataset[k],
                                    batch_size=batch_size,
                                    shuffle=shuffle,
                                    num_workers=num_workers,
                                    pin_memory=True)
                      for k in dataset.keys()}
        
        self.inspect_data(dataloader)
        
        return dataloader
    
    def inspect_data(self, data_dict):
        print("Inspecting Data...")

        for k, loader in data_dict.items():
            class_counts = Counter()
            for _, labels in loader:
                print(labels)
                class_counts.update(labels)

            print(f"- Class Counts for {k}:")
            print("Class Counts:", class_counts)

In [26]:
%%time
transform = {'train': transforms.Compose([transforms.Resize((240, 240)),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomVerticalFlip(),
                                          transforms.RandomRotation(degrees=10),
                                          transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]),
             'val': transforms.Compose([transforms.Resize((240, 240)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])}
test_size = 0.2
seed = 42
batch_size = 96
shuffle = True
num_workers = os.cpu_count()

dataloader = ImageDataset(data=data,
                          transform=transform,
                          test_size=test_size,
                          seed=seed,
                          batch_size=batch_size,
                          shuffle=shuffle,
                          num_workers=num_workers)

- Class Counts for train:
  Class 1: 74365 samples
  Class 0: 4551 samples
- Class Counts for val:
  Class 1: 18566 samples
  Class 0: 1164 samples
Inspecting Data...
1


TypeError: 'int' object is not iterable

In [31]:
%%time
transform = {'train': transforms.Compose([transforms.Resize((240, 240)),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomVerticalFlip(),
                                          transforms.RandomRotation(degrees=10),
                                          transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]),
             'val': transforms.Compose([transforms.Resize((240, 240)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])}

dataset = DatasetMaker(data=labeled_image_paths,
                       transform=transform,
                       test_size=0.2,
                       seed=42)

- Class Counts for train:
  Class 1: 74319 samples
  Class 0: 4597 samples
- Class Counts for val:
  Class 1: 18612 samples
  Class 0: 1118 samples
CPU times: user 54.2 ms, sys: 23.4 ms, total: 77.5 ms
Wall time: 74.2 ms


In [32]:
def DataLoaderMaker(dataset, batch_size, shuffle, num_workers):
    dataloader = {k: DataLoader(dataset=dataset[k],
                                batch_size=batch_size,
                                shuffle=shuffle,
                                num_workers=num_workers,
                                pin_memory=True)
                  for k in dataset.keys()}
    
    print("Inspecting Data...")

    class_counts = {}
    for k, loader in dataloader.items():
        for _, labels in loader:
            for label in labels.tolist():
                class_counts[label] = class_counts.get(label, 0) + 1

        print(f"- Class Counts for {k}:")
        for class_label, count in class_counts.items():
            print(f"  Class {class_label}: {count} samples")
    
    return dataloader

In [33]:
%%time
batch_size = 96
shuffle = True
num_workers = os.cpu_count()

dataloader = DataLoaderMaker(dataset=dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             num_workers=num_workers)

Inspecting Data...


- Class Counts for train:
  Class 1: 74319 samples
  Class 0: 4597 samples
- Class Counts for val:
  Class 1: 92931 samples
  Class 0: 5715 samples
CPU times: user 17 s, sys: 15.7 s, total: 32.7 s
Wall time: 1min 20s


In [38]:
class ImageDataset(Dataset):
    def __init__(self, labeled_image_paths, transform):
        self.labeled_image_paths = labeled_image_paths
        self.transform = transform

        self.labels = [label for _, label in labeled_image_paths]

    def __len__(self):
        return len(self.labeled_image_paths)

    def __getitem__(self, idx):
        image_path, label = self.labeled_image_paths[idx]

        image = Image.open(image_path)
        image = self.transform(image)

        return image, label

In [39]:
%%time
transform = {'train': transforms.Compose([transforms.Resize((240, 240)),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomVerticalFlip(),
                                          transforms.RandomRotation(degrees=10),
                                          transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]),
             'val': transforms.Compose([transforms.Resize((240, 240)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])}

dataset = ImageDataset(labeled_image_paths=labeled_image_paths,
                       transform=transform)

CPU times: user 25.7 ms, sys: 92.4 ms, total: 118 ms
Wall time: 113 ms


In [44]:
class DataLoaderMaker:
    def __init__(self, dataset, batch_size, train_ratio=None, num_workers=None):
        self.dataset = dataset
        self.train_ratio = train_ratio
        self.batch_size = batch_size
        self.num_workers = num_workers

        if train_ratio:
            self.split_and_make_dataloader()
        else:
            self.dataloader = self.make_dataloader(self.dataset)

    def make_dataloader(self, dataset, shuffle=False):
        dataloader = DataLoader(dataset=dataset,
                                batch_size=self.batch_size,
                                shuffle=shuffle,
                                num_workers=self.num_workers,
                                pin_memory=True)
        
        self.inspect_data(dataloader)
        
        return dataloader

    def split_and_make_dataloader(self):
        train_size = int(len(self.dataset) * self.train_ratio)
        test_size = len(self.dataset) - train_size
        train_dataset, test_dataset = random_split(self.dataset, [train_size, test_size])

        self.train_loader = self.make_dataloader(train_dataset, shuffle=True)
        self.test_loader = self.make_dataloader(test_dataset, shuffle=True)

    def inspect_data(self, dataloader):
        print("Inspecting Data...")

        class_counts = {}
        for _, labels in dataloader:
            for label in labels.tolist():
                class_counts[label] = class_counts.get(label, 0) + 1

        print("- Class Counts:")
        for class_label, count in class_counts.items():
            print(f"  Class {class_label}: {count} samples")

In [45]:
%%time
batch_size = 96
num_workers = os.cpu_count()
train_ratio = 0.8

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers,
                              train_ratio=train_ratio)

Inspecting Data...


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = self.dataset.__getitems__(possibly_batched_index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 364, in __getitems__
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 364, in <listcomp>
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/tmp/ipykernel_2100428/2975926141.py", line 15, in __getitem__
    image = self.transform(image)
TypeError: 'dict' object is not callable


# Modeling

In [None]:
class ModelTrainer:
    def __init__(self,
                 model,
                 device,
                 train_dataloader,
                 valid_dataloader,
                 criterion,
                 optimizer,
                 scheduler,
                 pet_type,
                 lesion):
        self.device = device
        self.model = model.to(self.device)
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader
        self.criterion = criterion.to(device)
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.best_f1_score = 0.0
        korea = pytz.timezone('Asia/Seoul')
        now = datetime.now(korea)
        start_time = now.strftime('%Y%m%d-%H%M%S')
        self.name = f'{start_time}_{pet_type}_{lesion}.pth'
        self.writer = SummaryWriter(log_dir=f'runs/{self.name}')

    def calculate_f1_score(self, predicted, labels):
        return f1_score(labels, predicted, average='binary')

    def calculate_auc_roc(self, predicted, labels):
        return roc_auc_score(labels, predicted)

    def run_epoch(self, epoch, num_epochs, phase='train'):
        self.model.train() if phase == 'train' else self.model.eval()
        dataloader = self.train_dataloader if phase == 'train' else self.valid_dataloader

        total_loss = 0.0
        correct = 0
        total = 0
        all_predicted = []
        all_labels = []

        for inputs, labels in tqdm(dataloader, desc=f'{phase.capitalize()} Epoch {epoch + 1}/{num_epochs}', unit='batch'):
            inputs, labels = inputs.to(self.device), labels.to(self.device)

            with torch.set_grad_enabled(phase == 'train'):
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    self.optimizer.step()
                    self.optimizer.zero_grad()

                total_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                all_predicted.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_loss = total_loss / len(dataloader)
        accuracy = correct / total
        self.writer.add_scalar(f'Loss/{phase}', avg_loss, epoch)
        self.writer.add_scalar(f'Accuracy/{phase}', accuracy, epoch)

        if phase == 'val':
            current_f1_score = self.calculate_f1_score(np.array(all_predicted), np.array(all_labels))
            current_auc_roc = self.calculate_auc_roc(np.array(all_predicted), np.array(all_labels))

            self.writer.add_scalar('F1 Score/valid', current_f1_score, epoch)
            self.writer.add_scalar('AUC-ROC/valid', current_auc_roc, epoch)

            if current_f1_score > self.best_f1_score:
                self.best_f1_score = current_f1_score
                torch.save(self.model, self.name)

    def train(self, num_epochs):
        for epoch in range(num_epochs):
            self.run_epoch(epoch, num_epochs, 'train')
            self.run_epoch(epoch, num_epochs, 'val')

        self.writer.close()

In [9]:
class ModelTrainer:
    def __init__(self, 
                 model,
                 device,
                 train_dataloader,
                 valid_dataloader,
                 criterion,
                 optimizer,
                 scheduler,
                 pet_type,
                 lesion):
        self.device = device
        self.model = model.to(self.device)
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader
        self.criterion = criterion.to(device)
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.best_f1_score = 0.0
        korea = pytz.timezone('Asia/Seoul')
        now = datetime.now(korea)
        start_time = now.strftime('%Y%m%d-%H%M%S')
        self.name = f'{start_time}_{pet_type}_{lesion}.pth'
        self.writer = SummaryWriter(log_dir=f'runs/{self.name}')

    def calculate_f1_score(self, predicted, labels):
        return f1_score(labels, predicted, average='binary')

    def calculate_auc_roc(self, predicted, labels):
        return roc_auc_score(labels, predicted)

    def train_one_epoch(self, epoch, num_epochs):
        self.model.train()
        total_loss = 0.0
        correct = 0
        total = 0
    
        for step, (inputs, labels) in enumerate(tqdm(self.train_dataloader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')):
            inputs, labels = inputs.to(self.device), labels.to(self.device)
    
            outputs = self.model(inputs)
            
            loss = self.criterion(outputs, labels)
            total_loss += loss.item()
    
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
            loss.backward()
    
            self.optimizer.step()
            self.optimizer.zero_grad()
    
        self.scheduler.step()
        self.writer.add_scalar('LearningRate',
                               self.scheduler.get_last_lr()[0],
                               epoch)
    
        avg_loss = total_loss / len(self.train_dataloader)
        accuracy = correct / total
        self.writer.add_scalar('Loss/train', avg_loss, epoch)        
        self.writer.add_scalar('Accuracy/train', accuracy, epoch)

    def eval_one_epoch(self, epoch):
        self.model.eval()
        total_loss = 0.0
        correct = 0
        total = 0
        all_predicted = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in self.valid_dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
        
                outputs = self.model(inputs)
        
                loss = self.criterion(outputs, labels)
                total_loss += loss.item()
        
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
                all_predicted.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        avg_loss = total_loss / len(self.valid_dataloader)
        accuracy = correct / total
        self.writer.add_scalar('Loss/valid', avg_loss, epoch)
        self.writer.add_scalar('Accuracy/valid', accuracy, epoch)
        
        current_f1_score = self.calculate_f1_score(np.array(all_predicted),
                                                   np.array(all_labels))
        current_auc_roc = self.calculate_auc_roc(np.array(all_predicted),
                                                 np.array(all_labels))
        
        self.writer.add_scalar('F1 Score/valid', current_f1_score, epoch)
        self.writer.add_scalar('AUC-ROC/valid', current_auc_roc, epoch)
        
        if current_f1_score > self.best_f1_score:
            self.best_f1_score = current_f1_score
            torch.save(self.model, self.name)

    def train(self, num_epochs):
        for epoch in range(num_epochs):
            self.train_one_epoch(epoch, num_epochs)
            self.eval_one_epoch(epoch)
            
        self.writer.close()

In [10]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=None, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        targets = targets.to(inputs.device)
        
        ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss

        if self.alpha is not None:
            # Move alpha to the same device as inputs
            self.alpha = self.alpha.to(inputs.device)
            focal_loss = self.alpha[targets] * focal_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        elif self.reduction == 'none':
            return focal_loss
        else:
            raise ValueError("Invalid reduction option")

In [11]:
model = models.efficientnet_b1(pretrained=True)
for name, param in model.named_parameters():
    if "last_layer" not in name:
        param.requires_grad = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_ftrs = model.classifier[1].in_features
num_classes = 2
model.classifier[1] = nn.Linear(num_ftrs, num_classes)
criterion = FocalLoss(gamma=2, alpha=class_weights, reduction='sum')
optimizer = torch.optim.AdamW(model.parameters(), weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

trainer = ModelTrainer(model=model,
                       device=device,
                       train_dataloader=data_loader.train_loader,
                       valid_dataloader=data_loader.test_loader,
                       criterion=criterion,
                       optimizer=optimizer,
                       scheduler=scheduler,
                       pet_type=pet_type,
                       lesion=lesion)

In [12]:
trainer.train(30)

Epoch 1/30: 100%|██████████| 823/823 [06:59<00:00,  1.96batch/s]
Epoch 2/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 3/30: 100%|██████████| 823/823 [06:58<00:00,  1.97batch/s]
Epoch 4/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 5/30: 100%|██████████| 823/823 [06:58<00:00,  1.97batch/s]
Epoch 6/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 7/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 8/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 9/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 10/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 11/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 12/30: 100%|██████████| 823/823 [06:58<00:00,  1.97batch/s]
Epoch 13/30: 100%|██████████| 823/823 [06:58<00:00,  1.97batch/s]
Epoch 14/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 15/30: 100%|██████████| 823/823 [06:57<00:00,  1.97batch/s]
Epoch 16/30: 100%|█

# Evaluation

In [13]:
class ModelTester:
    def __init__(self, path, device, dataloader):
        self.device = device
        self.dataloader = dataloader
        self.load_model(path)
        self.evaluate()

    def load_model(self, path):
        self.model = torch.load(path)
        self.model.to(self.device)

    def classify(self):
        self.model.eval()
        predictions = []
        labels = []
        probabilities = []

        with torch.no_grad():
            for inputs, targets in tqdm(self.dataloader):
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                outputs = self.model(inputs)
                
                _, predicted = torch.max(outputs, 1)

                predictions.extend(predicted.cpu().numpy())
                labels.extend(targets.cpu().numpy())
                probabilities.extend(torch.nn.functional.softmax(outputs, dim=1).cpu().numpy())

        return predictions, labels, probabilities

    def calculate_prob_stats(self, probabilities):
        probabilities = np.array(probabilities)
        min_probs = np.min(probabilities)
        max_probs = np.max(probabilities)
        std_probs = np.std(probabilities)
        mean_probs = np.mean(probabilities)

        return min_probs, max_probs, std_probs, mean_probs

    def calculate_percentage(self, value):
        return f'{value*100:.2f}%'

    def evaluate(self):
        predictions, labels, probabilities = self.classify()
        cm = confusion_matrix(labels, predictions)
        accuracy = accuracy_score(labels, predictions)
        f1 = f1_score(labels, predictions, average='weighted')

        min_probs, max_probs, std_probs, mean_probs = self.calculate_prob_stats(probabilities)

        print('Evaluation Results:')
        print(f'Confusion Matrix:\n{cm}')
        print(f'Accuracy: {self.calculate_percentage(accuracy)}')
        print(f'F1 Score: {self.calculate_percentage(f1)}')
        print(f'Mean Probability: {self.calculate_percentage(mean_probs)}')
        print(f'Max Probability: {self.calculate_percentage(max_probs)}')
        print(f'Min Probability: {self.calculate_percentage(min_probs)}')
        print(f'Standard Deviation of Probabilities: {std_probs:.4f}')

In [14]:
%%time
base_path = 'eye/Valid'
folder_name = '일반'
'''
['유']
dog: 안검염, 안검종양, 안검내반증, 유루증, 색소침착성각막염, 핵경화, 결막염
cat: 안검염, 결막염, 각막부골편, 비궤양성각막염, 각막궤양
['상', '하']
dog: 궤양성각막질환, 비궤양성각막질환
['초기', '비성숙', '성숙']
dog: 백내장
'''
pet_type = '개'
lesion = '안검내반증'
devices = ['스마트폰', '일반카메라']
symptom = ['유']

processor = PathLabelProcessor(base_path=base_path,
                               folder_name=folder_name,
                               pet_type=pet_type,
                               lesion=lesion,
                               devices=devices,
                               symptom=symptom)

labeled_image_paths = processor.labeled_image_paths

Total cases: 13808
Number of symptomatic cases: 1023, Number of asymptomatic cases: 12785
CPU times: user 838 ms, sys: 308 ms, total: 1.15 s
Wall time: 1.15 s


In [15]:
%%time
transform = transforms.Compose([transforms.Resize((240, 240))])

dataset = ImageDataset(labeled_image_paths=labeled_image_paths,
                       transform=transform)

batch_size = 32
num_workers = os.cpu_count()

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

path = '20231212-215730_개_안검내반증.pth'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ModelTester(path=path, device=device, dataloader=data_loader.dataloader)

Inspecting Data...


- Class Counts:
  Class 1: 12785 samples
  Class 0: 1023 samples


100%|██████████| 432/432 [00:20<00:00, 21.47it/s]


Evaluation Results:
Confusion Matrix:
[[    2  1021]
 [   15 12770]]
Accuracy: 92.50%
F1 Score: 89.01%
Mean Probability: 50.00%
Max Probability: 97.77%
Min Probability: 2.23%
Standard Deviation of Probabilities: 0.3940
CPU times: user 24.8 s, sys: 6.22 s, total: 31 s
Wall time: 27.8 s


<__main__.ModelTester at 0x7fa9631ba140>

In [16]:
%%time
dataset = ImageDataset(labeled_image_paths=[item for item in labeled_image_paths if item[1] == 0],
                       transform=transform)

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

ModelTester(path=path, device=device, dataloader=data_loader.dataloader)

Inspecting Data...
- Class Counts:
  Class 0: 1023 samples


100%|██████████| 32/32 [00:02<00:00, 13.50it/s]


Evaluation Results:
Confusion Matrix:
[[   2 1021]
 [   0    0]]
Accuracy: 0.20%
F1 Score: 0.39%
Mean Probability: 50.00%
Max Probability: 92.84%
Min Probability: 7.16%
Standard Deviation of Probabilities: 0.3223
CPU times: user 1.98 s, sys: 1.44 s, total: 3.42 s
Wall time: 3.75 s


<__main__.ModelTester at 0x7fa805bd2aa0>

In [17]:
%%time
dataset = ImageDataset(labeled_image_paths=[item for item in labeled_image_paths if item[1] == 1],
                       transform=transform)

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

ModelTester(path=path, device=device, dataloader=data_loader.dataloader)

Inspecting Data...


- Class Counts:
  Class 1: 12785 samples


100%|██████████| 400/400 [00:18<00:00, 21.29it/s]

Evaluation Results:
Confusion Matrix:
[[    0     0]
 [   15 12770]]
Accuracy: 99.88%
F1 Score: 99.94%
Mean Probability: 50.00%
Max Probability: 97.77%
Min Probability: 2.23%
Standard Deviation of Probabilities: 0.3992
CPU times: user 23.1 s, sys: 5.74 s, total: 28.8 s
Wall time: 26.2 s





<__main__.ModelTester at 0x7fa9631ba0e0>

In [None]:
class PreModelTester:
    def __init__(self, path, device, dataloader):
        self.device = device
        self.dataloader = dataloader
        self.model = models.vgg16_bn(pretrained=True)
        self.load_model(path)
        self.evaluate()

    def load_model(self, path):
        self.model = models.vgg16_bn(pretrained=True)
        nr_filters = self.model.classifier[0].in_features
        self.model.classifier = nn.Linear(nr_filters, 1)
        state_dict = torch.load(path, map_location=torch.device("cpu"))
        model_dict = self.model.state_dict()
        state_dict = {k: v for k, v in state_dict.items() if k in model_dict}
        model_dict.update(state_dict)
        self.model.load_state_dict(model_dict)
        self.model = self.model.to(self.device)

    def classify(self):
        self.model.eval()
        predictions = []
        labels = []
        probabilities = []

        with torch.no_grad():
            for inputs, targets in tqdm(self.dataloader):
                inputs = inputs.to(self.device)
                targets = targets.to(self.device)
                outputs = self.model(inputs)
                
                probs = torch.nn.functional.softmax(outputs, dim=1)
                _, predicted = torch.max(outputs, 1)

                predictions.extend(predicted.cpu().numpy())
                labels.extend(targets.cpu().numpy())
                probabilities.extend(probs.max(dim=1).values.cpu().numpy())

        return predictions, labels, probabilities

    def calculate_prob_stats(self, probabilities):
        probabilities = np.array(probabilities)
        min_probs = np.min(probabilities)
        max_probs = np.max(probabilities)
        std_probs = np.std(probabilities)
        mean_probs = np.mean(probabilities)

        return min_probs, max_probs, std_probs, mean_probs

    def evaluate(self):
        predictions, labels, probabilities = self.classify()
        cm = confusion_matrix(labels, predictions)
        accuracy = accuracy_score(labels, predictions)
        f1 = f1_score(labels, predictions, average='weighted')

        min_probs, max_probs, std_probs, mean_probs = self.calculate_prob_stats(probabilities)

        print("Evaluation Results:")
        print(f"Confusion Matrix:\n{cm}")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"Mean Probability: {mean_probs:.4f}")
        print(f"Max Probability: {max_probs:.4f}")
        print(f"Min Probability: {min_probs:.4f}")        
        print(f"Standard Deviation of Probabilities: {std_probs:.4f}")

In [None]:
%%time
dataset = ImageDataset(labeled_image_paths=labeled_image_paths,
                       transform=transform)

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

path = 'pre_eye.pt'

PreModelTester(path=path, device=device, dataloader=data_loader.dataloader)

In [None]:
%%time
dataset = ImageDataset(
    labeled_image_paths=labeled_image_paths,
    transform=transform
)

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

PreModelTester(path=path, device=device, dataloader=data_loader.dataloader)

In [None]:
%%time
dataset = ImageDataset(
    labeled_image_paths=labeled_image_paths,
    transform=transform
)

data_loader = DataLoaderMaker(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=num_workers)

PreModelTester(path=path, device=device, dataloader=data_loader.dataloader)