# Inicialization

In [None]:
import matplotlib.pyplot as plt
#%matplotlib inline
from PIL import Image # Install Pillow -> conda install anaconda::pillow or pip install pillow
import os
from skimage.io import  imread, imshow # Install scikit-image -> conda install scikit-image or pip install scikit-image
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics import confusion_matrix, balanced_accuracy_score,  mean_squared_error, r2_score, f1_score
from sklearn.model_selection import KFold, train_test_split
import numpy as np

#import torch
import torch.nn as nn
import torch.optim as optim
from torch.functional import F

In [None]:
# Check if the code is running on Google Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    train_dataset_path = '/content/drive/MyDrive/Project1-AML/data-students/TRAIN'
    test_dataset_path = '/content/drive/MyDrive/Project1-AML/data-students/TEST'
else:
    # Load data from local file
    train_dataset_path = 'data-students/TRAIN'
    test_dataset_path = 'data-students/TEST'

# Now you can use file_path to load your data
#print("File path:", file_path)

#FIXED VARIABLES
IMG_WIDTH = 75
IMG_HEIGHT = 75
BATCH_SIZE = 32

#testing variables
seed = 42

In [None]:
from rembg import remove
import io

def remove_background_pil(image):
    # Convert PIL image to bytes
    with io.BytesIO() as output_buffer:
        image.save(output_buffer, format='PNG')
        image_bytes = output_buffer.getvalue()

    # Use rembg to remove the background
    result = remove(image_bytes)

    # Convert the result binary data back to a PIL image
    result_image = Image.open(io.BytesIO(result))

    # Fill transparent pixels with black
    result_image = fill_transparent_pixels_with_black(result_image)

    # Convert the image to RGB mode if it's not already
    if result_image.mode != 'RGB':
        result_image = result_image.convert('RGB')
        
    return result_image

def fill_transparent_pixels_with_black(image):
    # Convert image to RGBA mode if it's not already
    if image.mode != 'RGBA':
        image = image.convert('RGBA')

    # Get the image data as a pixel access object
    pixel_data = image.load()

    # Iterate over each pixel
    width, height = image.size
    for x in range(width):
        for y in range(height):
            # Check if the pixel is transparent
            if pixel_data[x, y][3] == 0:
                # Set the pixel color to black (RGB: 0, 0, 0, Alpha: 255)
                pixel_data[x, y] = (0, 0, 0, 255)

    return image

In [None]:
from torchvision.transforms import v2

# Define your custom transformation function
augmentation = transforms.Compose([v2.RandomAffine(degrees=(-20, 20), translate=(0.1, 0.2), scale=(0.5, 0.9))])

def removeBackground(image):
    # Apply your custom background removal function
    processed_image = remove_background_pil(image) # remove_background(removal_model, image)
    
    # Apply other transformations if needed
    transform = transforms.Compose([
        transforms.Resize((IMG_WIDTH, IMG_HEIGHT)),
        transforms.ToTensor(),
        #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    return transform(processed_image)

def Augment(image):
    # Apply your custom background removal function
    transform = transforms.Compose([
        augmentation,
        transforms.Resize((IMG_WIDTH, IMG_HEIGHT)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    return transform(image)

def input_transform(image):
    # Apply your custom background removal function
    transform = transforms.Compose([
        augmentation,
        transforms.Resize((IMG_WIDTH, IMG_HEIGHT)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    return transform(image)

normal_transform = transforms.Compose([
        transforms.Resize((IMG_WIDTH, IMG_HEIGHT)),
        transforms.ToTensor(),
        #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


In [None]:
#COMBINE DATASET WITH RANDOMLY AUGMENTED DATASETS

torch.cuda.is_available()
True
from torchvision.transforms import v2
import shutil


background_removed_path = 'data-students/TRAIN-NOBG'

'''
# Assuming you already have your original dataset and transformed dataset
original_dataset = datasets.ImageFolder(root=train_dataset_path, transform=normal_transform)

# Create a DataLoader
dataloader = DataLoader(original_dataset, batch_size=None, shuffle=False)

# Iterate through the DataLoader
for image_path, target in original_dataset.imgs:
    class_name = original_dataset.classes[target]
    class_path = os.path.join(background_removed_path, class_name)
    os.makedirs(class_path, exist_ok=True)
    
    # Load the image
    image = Image.open(image_path)
    
    # Apply the transformation
    transformed_image = removeBackground(image)
    
    # Save the transformed image in the corresponding class folder
    image_filename = os.path.basename(image_path)
    image_save_path = os.path.join(class_path, image_filename)
    transformed_image_pil = transforms.ToPILImage()(transformed_image)
    transformed_image_pil.save(image_save_path)  # Save the transformed image

'''


In [None]:
from torch.utils.data import ConcatDataset
nobg_dataset = datasets.ImageFolder(root=background_removed_path, transform=normal_transform)


# Define how many times you want to enlarge the dataset
enlarge_factor = 5

# Create a list to hold the datasets
combined_datasets = [nobg_dataset]

# Add the transformed dataset to the list multiple times
for _ in range(enlarge_factor):
    transformed_dataset = datasets.ImageFolder(root=background_removed_path, transform=Augment)
    combined_datasets.append(transformed_dataset)

# Concatenate the datasets into a single dataset
enlarged_dataset = ConcatDataset(combined_datasets)


test_set_size = 0.4
#get train & test for K-MEANS
train_val_dataset, test_dataset = train_test_split(enlarged_dataset, test_size = test_set_size, random_state=seed) #random_state = randomizer seed
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
len(enlarged_dataset)

# Auxiliares


In [None]:
#to print the label (AUXILIAR)
label_str = [
    "12 - Don't Go Left or Right",
    "13 - Don't Go Right",
    "24 - Go Right",
    "37 - Children crossing",
    "38 - Dangerous curve to the right",
    "39 - Dangerous curve to the left",
    "44 - Go left or straight",
    "50 - Fences",
    "6 - Speed limit (70km/h)"
]
label_str_id = [
    "12",
    "13",
    "24",
    "37",
    "38",
    "39",
    "44",
    "50",
    "6"
]

In [None]:
def voting(trained_models,data_loader,type):
    #type = 1 -> data loader with no labels (test folder)
    #type = 0 -> data loader with labels
    
    n_splits = 5
    #save the models
    for i, model in enumerate(trained_models):
        torch.save(model.state_dict(), f"model_fold_{i}.pth")

    if type == 0: #data loader with labels
        #pred matrix
        predictions = []
        for i in range(n_splits):
            model = batch_norm_CNN()
            model.load_state_dict(torch.load(f"model_fold_{i}.pth"))
            model.eval()
            model.to(device)
            i = 0
            with torch.no_grad():
                    model_predictions = []

                    for inputs, labels in data_loader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        _, predicted = outputs.max(1)
                        model_predictions.append(predicted)
                    predictions_aux = torch.cat(model_predictions, dim=0).cpu()
                    predictions.append(predictions_aux)

        predictions_matrix = np.vstack(predictions)

        #voting
        num_classes = predictions_matrix.max() + 1

        class_votes = np.zeros((num_classes, predictions_matrix.shape[1])) #(num_classes, num_predictions)

        for col in range(predictions_matrix.shape[1]):
            unique_classes, class_counts = np.unique(predictions_matrix[:, col], return_counts=True)
            class_votes[unique_classes, col] = class_counts

        most_voted_classes = np.argmax(class_votes, axis=0) #pred

        #labels
        full_dataset = []
        for batch in data_loader:
            inputs, labels = batch
            full_dataset.append((inputs, labels))
            y = torch.cat([labels for _, labels in full_dataset], dim=0) #labels

        # evaluation
        # Compute confusion matrix and F1 score
    
        conf_mat = confusion_matrix(y, most_voted_classes)
        f1 = f1_score(y, most_voted_classes, average='weighted')
        bal_acc = balanced_accuracy_score(y, most_voted_classes)
        #precision = precision_score(y, most_voted_classes, average='weighted')
        #recall = recall_score(y, most_voted_classes, average='weighted')

        print('Confusion Matrix:\n', conf_mat)
        print('F1 Score: ', f1)
        print('B_acc: ', bal_acc)
        #print('Precision: ', precision)
        #print('Recall: ', recall)

        return y, most_voted_classes
        
    else: #from test folder - no labels
        predictions = []
        for i in range(n_splits):
            model = batch_norm_CNN()
            model.load_state_dict(torch.load(f"model_fold_{i}.pth"))
            model.eval()
            model.to(device)
            i = 0
            with torch.no_grad():
                    model_predictions = []

                    for i, inputs in enumerate(data_loader):
                        inputs = inputs.to(device)
                        outputs = model(inputs)
                        predicted = torch.argmax(outputs, dim=1)
                        model_predictions.append(predicted)
                    predictions_aux = torch.cat(model_predictions, dim=0).cpu()
                    predictions.append(predictions_aux)

        predictions_matrix = np.vstack(predictions)

        #voting
        num_classes = predictions_matrix.max() + 1

        class_votes = np.zeros((num_classes, predictions_matrix.shape[1])) #(num_classes, num_predictions)

        for col in range(predictions_matrix.shape[1]):
            unique_classes, class_counts = np.unique(predictions_matrix[:, col], return_counts=True)
            class_votes[unique_classes, col] = class_counts

        most_voted_classes = np.argmax(class_votes, axis=0) #pred
         
        return most_voted_classes

In [None]:
 # PREDICT TEST FOLDER AND CREATE CSV FILE ----------------------------------------------------

def createCSV(model, test_dataset_loader,type, name):
    import csv

    data = []

    #directory where you want to save the CSV file
    try:
        import google.colab
        IN_COLAB = True
    except ImportError:
        IN_COLAB = False

    if IN_COLAB:
        save_dir = "/content/drive/MyDrive/Project1-AML/Nic/"
    else:
        #save_dir = r"C:\Users\Nicoli Leal\Desktop\MEEC\2 semestre\Aprendizagem Computacional Avançada\Project-1"
        save_dir = "/home/stefanotrenti/AML/project"


    #file name
    csv_file = os.path.join(save_dir, name)

    if type == "voting":
      most_voted_classes = voting(model, test_dataset_loader,1)
      for i in range(len(most_voted_classes)):
        predicted_class = int(most_voted_classes[i])  # Extract the integer value
        data.append({"ID": i+1, "Class": label_str_id[predicted_class]}) #, "Name": label_str[test_predictions]})

    else:
      for i, images in enumerate(test_dataset_loader):

          images = images.to(device)
          # Forward pass
          outputs = model(images)
          test_predictions = torch.argmax(outputs, dim=1)

          images = images.cpu().numpy()
          predicted_classes = test_predictions.cpu().numpy()

          # Iterate over the batch
          predicted_class = int(predicted_classes[0])  # Extract the integer value
          data.append({"ID": i+1, "Class": label_str_id[predicted_class]}) #, "Name": label_str[test_predictions]})


    # Define the field names
    fields = ["ID", "Class"]#, "Name"]

    # Write data to CSV file
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fields)

        # Write the header
        writer.writeheader()

        # Write the data rows
        for row in data:
            writer.writerow(row)

    print("CSV file created successfully.")
    return

#EVALUATE FUNCTION

In [None]:
# DEFINE EVALUATE FUNCTION
def evaluate_network(model,dataset_loader, to_device=True):
    # X given input data
    # y corresponding target labels
    full_dataset = []
    for batch in dataset_loader:
        # Assuming each batch is a tuple (inputs, labels)
        inputs, labels = batch
        full_dataset.append((inputs, labels))

        # Concatenate all data points into a single tensor
        X = torch.cat([inputs for inputs, _ in full_dataset], dim=0)
        y = torch.cat([labels for _, labels in full_dataset], dim=0)


    # Set the model to evaluation mode
    model.eval()
    if to_device:
      # Assuming you're using GPU (if available)
      # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        X = X.to(device)
        y = y.to(device)
        #model = model.to(device)

    # Run the model on the test data
    with torch.no_grad():
        outputs = model(X)
        _, predicted = torch.max(outputs.data, 1)

    # Convert tensors to numpy arrays
    if to_device:
        predicted = predicted.to("cpu")

    predicted_np = predicted.cpu().numpy()
    test_target_np = y.cpu().numpy()


    # Compute confusion matrix and F1 score
    conf_mat = confusion_matrix(test_target_np, predicted_np)
    f1 = f1_score(test_target_np, predicted_np, average='weighted')
    bal_acc = balanced_accuracy_score(y.cpu(), predicted_np)

    #print('Confusion Matrix:\n', conf_mat)
    print('F1 Score: ', f1)
    print('B_acc: ', bal_acc)

    return conf_mat, f1, bal_acc

#TRAIN Function

In [None]:
def trainCNN(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = 100. * correct / total

        print(f'Epoch [{epoch + 1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

    print('Finished Training')
    return model, val_acc, val_loss

In [None]:
def Train_K_FOLDS(k_model, epochs, lr, folds):
    
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)

    models = []
    index=0
    i = 0
    best_accuracy = 0
    BATCH_SIZE = 32

    for fold, (train_index, val_index) in enumerate(kf.split(train_val_dataset)):

        model = k_model()

        #Creating DataLoaders for training and validation
        train_sampler = torch.utils.data.SubsetRandomSampler(train_index)
        val_sampler = torch.utils.data.SubsetRandomSampler(val_index)
        #
        train_loader = DataLoader(train_val_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
        val_loader = DataLoader(train_val_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)

        # Define your loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        # Train the model

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        trained_model, val_acc, val_loss = trainCNN(model, train_loader, val_loader, criterion, optimizer, epochs, device)
        models.append(trained_model)
        conf_matrix, F_score, bal_acc = evaluate_network(trained_model, val_loader)
        if(val_acc > best_accuracy and bal_acc <1.1):
            best_accuracy = val_acc
            index = i
            best_model = model

        print("Best model has index ", index , "\n")
        
    return best_model, models

In [None]:
# TRAIN THE MODEL WITH NO FOLDS
def Train_NOFOLDS(model, epochs, lr):

    nofolds_model = model()

    train_dataset, val_dataset = train_test_split(train_val_dataset, test_size=0.1, random_state=seed) #random_state = randomizer seed

    #Creating DataLoaders for training and validation
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle = True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle = True)

    # Define your loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(nofolds_model.parameters(), lr=lr)

    # Train the nofolds_model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    nofolds_model, val_acc, val_loss = trainCNN(nofolds_model, train_loader, val_loader, criterion, optimizer, epochs, device)
    conf_matrix, F_score, bal_acc = evaluate_network(nofolds_model, val_loader)
    
    return nofolds_model

# CNN

In [None]:
class batch_norm_CNN(nn.Module):
    def __init__(self, input_channels=3, num_classes=9):
        dpr = 0.1
        super(batch_norm_CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            #input shape (BATCH_SIZE, 3, 75, 75)
            nn.Conv2d(input_channels, out_channels = 12, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(12),
            #output shape (BATCH_SIZE, 12, 75, 75)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), #add only ONE pooling layers (?) (image size too small -> we loose -spacial- information)
            #outputa shape (BATCH_SIZE, 12, 37, 37)

            #SECOND LAYER -----------------------------------------------------------
            #input shape (BATCH_SIZE, 12, 37, 37)
            nn.Conv2d(12, out_channels = 32, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            #output shape (BATCH_SIZE, 32, 37, 37)

            #SECOND LAYER -----------------------------------------------------------
            #input shape (BATCH_SIZE, 32, 37, 37)
            nn.Conv2d(32, out_channels = 64, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            #output shape (BATCH_SIZE, 64, 37, 37)
        )

        self.fc = nn.Sequential(
            nn.Linear(64 * 37 * 37, 128),
            nn.ReLU(),
            nn.Dropout(dpr),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(-1,64*37*37)
        x = self.fc(x)
        return x
    
    def print_architecture(self):
        print(self)


In [None]:
class batch_norm_CNN_2L(nn.Module):
    def __init__(self, input_channels=3, num_classes=9):
        super(batch_norm_CNN_2L, self).__init__()        
        self.conv1 = nn.Conv2d(input_channels, 20, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 12, kernel_size=3, padding = 1)
        self.bn1 = nn.BatchNorm2d(12)

        # Output size calculation after max pooling:
        # For each dimension, (input_size - kernel_size + 2*padding) / stride + 1
        # For conv1: (75 - 3 + 2*1) / 1 + 1 = 75
        # After max pooling: 75 / 2 = 37
        # For conv2: (37 - 3 + 2*1) / 1 + 1 = 37
        # After max pooling: 37 / 2 = 18 (rounded down)
        self.fc1 = nn.Linear(18*18*12, 120)
        self.fc2 = nn.Linear(120, num_classes)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        x = x.view(-1, self.fc1.in_features)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def print_architecture(self):
        print(self)

In [None]:
class batch_norm_CNN_v2(nn.Module):
    def __init__(self, input_channels=3, num_classes=9):
        super(batch_norm_CNN_v2, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 20, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 12, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(12)
        self.conv4 = nn.Conv2d(12, 8, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(8)

        self.fc1 = nn.Linear(18*18*8, 120)
        self.fc2 = nn.Linear(120, num_classes)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.bn1(self.conv1(x))), 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(F.relu(self.bn3(self.conv3(x))), 2)
        x = F.relu(self.bn4(self.conv4(x)))

        x = x.view(-1, self.fc1.in_features)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def print_architecture(self):
        print(self)

In [None]:
class batch_norm_CNN_v3(nn.Module):
    def __init__(self, input_channels=3, num_classes=9):
        super(batch_norm_CNN_v3, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 20, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 18, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(18)
        self.conv3 = nn.Conv2d(18, 16, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16, 14, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(14)
        self.conv5 = nn.Conv2d(14, 12, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(12)
        self.conv6 = nn.Conv2d(12, 8, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(8)

        self.fc1 = nn.Linear(9*9*8, 120)
        self.fc2 = nn.Linear(120, num_classes)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.bn1(self.conv1(x))), 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(F.relu(self.bn3(self.conv3(x))), 2)
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.max_pool2d(F.relu(self.bn5(self.conv5(x))), 2)
        x = F.relu(self.bn6(self.conv6(x)))

        x = x.view(-1, self.fc1.in_features)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def print_architecture(self):
        print(self)

In [None]:
class batch_norm_CNN_v4(nn.Module):
    def __init__(self, input_channels=3, num_classes=9):
        super(batch_norm_CNN_v4, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 20, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 18, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(18)
        self.conv3 = nn.Conv2d(18, 16, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16, 14, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(14)
        self.conv5 = nn.Conv2d(14, 12, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(12)
        self.conv6 = nn.Conv2d(12, 10, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(10)
        self.conv7 = nn.Conv2d(10, 8, kernel_size=3, padding=1)
        self.bn7 = nn.BatchNorm2d(8)
        self.conv8 = nn.Conv2d(8, 6, kernel_size=3, padding=1)
        self.bn8 = nn.BatchNorm2d(6)

        self.fc1 = nn.Linear(18*18*8, 120)
        self.fc2 = nn.Linear(120, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(F.relu(self.bn3(self.conv3(x))), 2)
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.relu(self.bn5(self.conv5(x)))
        x = F.max_pool2d(F.relu(self.bn6(self.conv6(x))), 2)
        x = F.relu(self.bn7(self.conv7(x)))
        x = F.relu(self.bn8(self.conv8(x)))

        x = x.view(-1, self.fc1.in_features)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def print_architecture(self):
        print(self)

#Train - Folds

In [None]:
best_model_v3, models_v3 = Train_K_FOLDS(batch_norm_CNN_v3, 40, 0.0001, 10)
best_model_v2, models_v2 = Train_K_FOLDS(batch_norm_CNN_v2, 100, 0.0001, 10)
best_model, models = Train_K_FOLDS(batch_norm_CNN, 100, 0.0001, 10)
best_model_v4, models_v4 = Train_K_FOLDS(batch_norm_CNN_v4, 100, 0.0001, 10)


In [None]:
print("parameters")
print('learning rate  = ',learning_rate)
print('Test set size  = ',test_set_size)
print("enlarge_factor =", enlarge_factor)
print("number of folds =", n_splits) 
print("with epochs =", num_epochs)
best_model.print_architecture()
print(" ----------------------------- evaluation of best model")
evaluate_network(best_model, test_loader)
print("------------------------------ evaluation with voting")
voting(trained_models,test_loader,0)

#Train - no Folds

In [None]:
nofolds_model_v3 = Train_NOFOLDS(batch_norm_CNN_v3, 30, 0.0001)
nofolds_model_v2 = Train_NOFOLDS(batch_norm_CNN_v2, 30, 0.0001)
nofolds_model = Train_NOFOLDS(batch_norm_CNN, 30, 0.0001)

In [None]:
print("parameters")
print('learning rate  = ',learning_rate)
print('Test set size  = ',test_set_size)
print("enlarge_factor =", enlarge_factor)
print("number of folds = none") 
print("with epochs =", num_epochs)
nofolds_model.print_architecture() 

print(" ------------------------- evaluation of model")
evaluate_network(nofolds_model, test_loader)

## PREDICT TEST FOLDER AND SAVE **CSV**

In [None]:
import re
from torch.utils.data import Dataset

class TestDataset(Dataset):
    def get_int(self, text):
        return [int(c) if c.isdigit() else c for c in re.split('(\d+)', text)]

    def __init__(self, images_folder, transform=None):
        self.images_folder = images_folder
        self.image_files = [f for f in os.listdir(images_folder) if os.path.isfile(os.path.join(images_folder, f))]
        self.image_files.sort(key=self.get_int)
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.images_folder, self.image_files[idx])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image
    
#run if we want to use new test folder every time we use csv
'''
transform_test = transforms.Compose([removeBackground,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

inference_dataset = TestDataset(images_folder=test_dataset_path, transform=transform_test)

test_dataset_loader = DataLoader(inference_dataset, batch_size=1, shuffle=False)
'''

In [None]:
#ONLY RUN ONCE TO SAVE TEST IMAGES WITH NO BG IN FOLDER
# Define input and output folders
'''
input_folder = 'data-students/TEST'
output_folder = 'data-students/TEST-NOBG'

#Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

#List all image files in the input folder
image_files = [f for f in os.listdir(input_folder) if f.endswith(('png', 'jpg', 'jpeg', 'bmp'))]

#Transformation to convert PIL image to tensor
to_tensor = transforms.ToTensor()

#Process each image
for img_file in image_files:
    #Load image
    img_path = os.path.join(input_folder, img_file)
    img = Image.open(img_path).convert('RGB')  

    #Apply custom function
    processed_img = removeBackground(img)

    transformed_image_pil = transforms.ToPILImage()(processed_img)

    #Save tensor as image with the same name
    output_path = os.path.join(output_folder, img_file)
    transformed_image_pil.save(output_path)

print("All images processed and saved successfully.")
'''

In [None]:
#IF YOU WANT TO TAKE IMAGES FROM THE TEST FLDER WITH NO BG

def input_transform(image):
    # Apply your custom background removal function
    transform = transforms.Compose([
        transforms.Resize((IMG_WIDTH, IMG_HEIGHT)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    return transform(image)
test_nobg_path = 'data-students/TEST-NOBG'

inference_dataset = TestDataset(images_folder=test_nobg_path, transform=input_transform)

test_dataset_loader = DataLoader(inference_dataset, batch_size=1, shuffle=False)

In [None]:
createCSV(nofolds_model, test_dataset_loader, [],"predictions_CNN_noFolds.csv")

In [None]:
createCSV(best_model, test_dataset_loader,[],"predictions_CNN_bst_model.csv")

In [None]:
createCSV(trained_models, test_dataset_loader,"voting","predictions_CNN_voting.csv")

## END

In [None]:
def denormalize(tensor, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    """
    Denormalizes a tensor image.
    Args:
        tensor (Tensor): Tensor image of size (C, H, W) to be denormalized.
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channel.
    Returns:
        Tensor: Denormalized tensor image.
    """
    mean = np.array(mean)
    std = np.array(std)
    denormalized_tensor = tensor.clone()
    for t, m, s in zip(denormalized_tensor, mean, std):
        t.mul_(s).add_(m)
    return denormalized_tensor

def save_images(loader, save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    batch_size = loader.batch_size

    for i, batch in enumerate(loader):
        for j, item in enumerate(batch):
            if isinstance(item, tuple):
                item = item[0]  # Assuming the image is the first element of the tuple
            if len(item.shape) == 4:
                # If the tensor has 4 dimensions (batch dimension included), iterate over the batch
                for k, image_tensor in enumerate(item):
                    denorm_image_tensor = denormalize(image_tensor)
                    image_tensor = denorm_image_tensor.permute(1, 2, 0).cpu().numpy()
                    image = Image.fromarray((image_tensor * 255).astype(np.uint8))
                    image.save(os.path.join(save_path, f'image_{i * batch_size + j * batch_size + k}.png'))
            elif len(item.shape) == 3:
                # If the tensor has 3 dimensions (no batch dimension), convert it directly
                denorm_image_tensor = denormalize(item)
                image_tensor = denorm_image_tensor.permute(1, 2, 0).cpu().numpy()
                image = Image.fromarray((image_tensor * 255).astype(np.uint8))
                image.save(os.path.join(save_path, f'image_{i * batch_size + j}.png'))
            elif len(item.shape) == 1:
                # If the tensor has 1 dimension, it might be a label or other metadata, so skip it
                pass
            else:
                raise ValueError(f"Unexpected number of dimensions: {len(item.shape)}")



In [None]:
train_dataset, val_dataset = train_test_split(train_val_dataset, test_size=0.1, random_state=seed) #random_state = randomizer seed

#Creating DataLoaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle = True)

# Save images from train_loader
save_images(train_loader, 'train_images')

# Save images from val_loader
save_images(val_loader, 'val_images')

# Save images from test_loader
save_images(test_loader, 'test_images')