In [117]:
#IMPORTS
import os
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms

import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms


import torch.nn.functional as F
from sklearn.metrics import accuracy_score



### AUGMENT DATASET 

In [100]:

folder_path = os.getcwd()
input_path = folder_path + "\dataset"

# Define the PyTorch transform with random augmentations
augmenter = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    #transforms.RandomResizedCrop((image.size[1], image.size[0]), scale=(0.9, 1.1)),
    #transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
    
    transforms.RandomRotation(20, fill=255),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), fill=255),
    transforms.RandomVerticalFlip(),
    transforms.RandomPerspective(distortion_scale=0.2, fill=255),
    transforms.Grayscale(num_output_channels=1),  # Convert the image to grayscale
    transforms.ToTensor(),  # Convert the PIL image to a PyTorch tensor
])

grey_scale = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert the image to grayscale
    transforms.ToTensor(),  # Convert the PIL image to a PyTorch tensor
])

# Iterate over all images in the folder
for filename in os.listdir(input_path):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(input_path, filename)
        
        # Load the image
        image = Image.open(image_path)

        # Generate 5 different augmented images
        for i in range(5):
            # Apply the random augmentations
            augmented_tensor = augmenter(image)
            
            # Convert the augmented tensor back to PIL format
            augmented_image = transforms.ToPILImage()(augmented_tensor)

            # Save the augmented image - adjust the output folder as needed
            output_path = os.path.join( input_path, 'augmented', f'{i}{filename}.png')
            augmented_image.save(output_path)

        grey_tensor = grey_scale(image)
        grey_image = transforms.ToPILImage()(grey_tensor)
        # Save the augmented image - adjust the output folder as needed
        output_path = os.path.join( input_path, 'augmented', filename)
        grey_image.save(output_path)

## LOAD DATA AND GENERATE AUGMENTE DATA

### TRAINING

Idea: use VGG16, a pre-trained network, since our dataset is both artificial and small

In [102]:
data_folder = os.path.join(input_path, 'augmented') # The folder containing the augmented and original images
model_save_path = os.path.join(folder_path, "model"+".pth")

#folder_path = os.getcwd()
#input_path = folder_path + "\dataset"

# Define transformations for the training data

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.Grayscale(num_output_channels=1)(image),
    transforms.ToTensor(),
])

# Create a custom dataset
class MenstrualDataset(torch.utils.data.Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.image_list = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))]

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image_name = self.image_list[idx]
        image_path = os.path.join(self.folder_path, image_name)
        image = Image.open(image_path).convert("RGB")  # Convert to RGB

        
        if self.transform:
            image = self.transform(image)

        # Extract the label from the image name (assuming the label is the number after '_')
        label = int(image_name.split('_')[-1].split('.')[0])
        return image, label

# Create a custom dataset instance
full_dataset = MenstrualDataset(data_folder, transform)

print(f"The dataset has {len(full_dataset)} images")

# Define the number of folds
num_folds = 5

# Use StratifiedKFold to ensure class distribution balance in each fold
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

# Training loop with cross-validation
for fold, (train_indices, val_indices) in enumerate(skf.split(full_dataset, [label for _, label in full_dataset])):
    print(f"Fold {fold + 1}/{num_folds}")

    # Create DataLoader instances for training and validation
    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(full_dataset, val_indices)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

    # Load the pre-trained VGG-16 model
    model = models.vgg16(pretrained=True)
    num_classes = 3  # Change this according to your dataset
    model.classifier[6] = nn.Linear(4096, num_classes)

    # Set the device to GPU if available
    device = torch.device("cpu") #("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)


    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Training loop
    num_epochs = 15 
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

    # Save the trained model for each fold
    model_k = f'model_fold_{fold + 1}.pth'
    model_k_path = os.path.join(folder_path, model_k)

    torch.save(model.state_dict(), model_k_path)

    # Evaluate the model on the validation set
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Compute and print accuracy for each fold
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Validation Accuracy (Fold {fold + 1}): {accuracy}")

torch.save(model.state_dict(), model_save_path)







96
