In [1]:
import os
import tomllib

import matplotlib.pyplot as plt
import PIL
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

from util import *

In [2]:
with open('parameters.toml', 'r') as f:
    parameters = tomllib.loads(f.read())
    
parameters

{'TRAINING': {'healthy_skin_path': './dataset/healthy/',
  'diseased_skin_path': './dataset/psoriasis/',
  'training_dataset_ratio': 0.8,
  'pretrained_model_name': 'youngp5/skin-conditions',
  'target_label': 'Psoriasis pictures Lichen Planus and related diseases',
  'threshold': 0.9}}

In [3]:
base_dataset = {'training': list(), 'validation': list(), 'testing': list()}

raw_healthy_images = read_images(parameters['TRAINING']['healthy_skin_path'])
raw_diseased_images = read_images(parameters['TRAINING']['diseased_skin_path'])

total_raw_images = raw_healthy_images + raw_diseased_images

random.shuffle(total_raw_images)

for i, image in enumerate(total_raw_images):
    training_ratio = parameters['TRAINING']['training_dataset_ratio']

    if i < len(total_raw_images) * training_ratio:
        base_dataset['training'].append(image)
    elif i < len(total_raw_images) * round((1 + training_ratio) / 2, 1):
        base_dataset['validation'].append(image)
    else:
        base_dataset['testing'].append(image)
        
print(f"Total dataset: {len(total_raw_images)} items.\n")
print(f"Training: {len(base_dataset['training'])} items.")
print(f"Validation: {len(base_dataset['validation'])} items.")
print(f"Testing: {len(base_dataset['testing'])} items.")

Total dataset: 2806 items.

Training: 2245 items.
Validation: 281 items.
Testing: 280 items.


In [4]:
classifier = SkinConditionsClassifier(parameters['TRAINING']['pretrained_model_name'])



In [6]:
precision_healthy = 0
precision_diseased = 0

for img in raw_healthy_images:
    prediction_score = flatten_prediction(classifier.predict(PIL.Image.fromarray(img))).get(parameters['TRAINING']['target_label'], 0)
    
    if prediction_score < parameters['TRAINING']['threshold']:
        precision_healthy += 1
        
for img in raw_diseased_images:
    prediction_score = flatten_prediction(classifier.predict(PIL.Image.fromarray(img))).get(parameters['TRAINING']['target_label'], 0)
    
    if prediction_score >= parameters['TRAINING']['threshold']:
        precision_diseased += 1

total_precision = (precision_healthy + precision_diseased) / len(raw_healthy_images + raw_diseased_images)
precision_healthy /= len(raw_healthy_images)
precision_diseased /= len(raw_diseased_images)

print(f'Healthy skin precision: {precision_healthy*100}%')
print(f'Diseased skin precision: {precision_diseased*100}%')
print(f'Total precision: {total_precision*100}%')

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Healthy skin precision: 100.0%
Diseased skin precision: 56.449771689497716%
Total precision: 72.80826799714897%


In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

train_dataset = SkinDiseaseDataset('/dataset', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
model = BinaryCNN()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer)