In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

### Dataset Loading and Labeling

In [2]:
TRAIN_PATH = 'images/train'
TEST_PATH = 'images/test'

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),          # specify images to 224x224
        transforms.RandomHorizontalFlip(),      # randomly flip images horizontally
        transforms.RandomRotation(10),          # randomly rotate images by up to 10 degrees
        transforms.ToTensor(),                  # convert to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # normalize with mean and std values
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),          # specify images to 224x224
        transforms.ToTensor(),                  # convert to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # normalize with mean and std values
    ])
}

In [4]:
train_dataset = datasets.ImageFolder(
    root=TRAIN_PATH,
    transform=data_transforms['train'])

test_dataset = datasets.ImageFolder(
    root=TEST_PATH,
    transform=data_transforms['test'])

In [5]:
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

In [6]:
train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    shuffle=True)

val_loader = DataLoader(
    val_dataset, 
    batch_size=32, 
    shuffle=False)

test_loader = DataLoader(
    test_dataset, 
    batch_size=32, 
    shuffle=False)

In [7]:
# class verification
print("Class Indices:", train_dataset.dataset.class_to_idx)
print("Number of Training Samples:", len(train_dataset))
print("Number of Validation Samples:", len(val_dataset))
print("Number of Test Samples:", len(test_dataset))

Class Indices: {'Benign': 0, 'Malignant': 1}
Number of Training Samples: 9503
Number of Validation Samples: 2376
Number of Test Samples: 2000
