In [1]:
import os
import pandas as pd

# Define the base directory where the folders are located (this will be replaced with your actual base directory)
base_dir = '../../Data/train/images/'

# Initialize an empty list to hold the data
data = []

# List all items in the base directory
for item in os.listdir(base_dir):
    # Construct the full path of the item
    item_path = os.path.join(base_dir, item)
    # Check if the item is a directory
    if os.path.isdir(item_path):
        # Split the directory name to get the labels
        labels = item.split('_')  # Assuming the format is 'No_Gender_Race_Age'
        # List all files in the directory
        for file in os.listdir(item_path):
            # Skip hidden files
            if file.startswith('.'):
                continue
            # Determine the mask label based on the file name
            if 'incorrect_mask' in file:
                mask_label = 1
            elif any(mask in file for mask in ['mask1', 'mask2', 'mask3', 'mask4', 'mask5']):
                mask_label = 0
            elif 'normal' in file:
                mask_label = 2
            else:
                continue  # If the file name doesn't match any condition, skip it
            # Construct the total label
            gender_label = 0 if labels[1] == "male" else 1
            age_label = 0 if int(labels[3]) < 30 else (2 if int(labels[3]) > 60 else 1)
            total_label = 6 * mask_label + 3 * gender_label + age_label
            # Append the information to the data list
            data.append({
                'Image_path': os.path.join(item_path, file),
                'Mask_label': mask_label,
                'Gender_label': gender_label,
                'Age_label': age_label,
                'Total_label': total_label
            })

# Create a DataFrame
df = pd.DataFrame(data)
df.to_csv('./dataframe.csv', index=False)
df

Unnamed: 0,Image_path,Mask_label,Gender_label,Age_label,Total_label
0,../../Data/train/images/004447_male_Asian_24/m...,0,0,0,0
1,../../Data/train/images/004447_male_Asian_24/m...,0,0,0,0
2,../../Data/train/images/004447_male_Asian_24/m...,0,0,0,0
3,../../Data/train/images/004447_male_Asian_24/n...,2,0,0,12
4,../../Data/train/images/004447_male_Asian_24/i...,1,0,0,6
...,...,...,...,...,...
18895,../../Data/train/images/003642_male_Asian_59/m...,0,0,1,1
18896,../../Data/train/images/003642_male_Asian_59/n...,2,0,1,13
18897,../../Data/train/images/003642_male_Asian_59/i...,1,0,1,7
18898,../../Data/train/images/003642_male_Asian_59/m...,0,0,1,1


In [5]:
from PIL import Image
import torch
from torch.utils.data import Dataset
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

class CustomDataset(Dataset):
    
    transform = A.Compose([
        A.Resize(256, 256),
        A.RandomCrop(224, 224),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    
    def __init__(self, dataframe, transform=None):
        """
        Custom dataset that accepts a DataFrame, a transformation function, and returns images and labels.
        
        :param dataframe: pandas DataFrame containing the image paths and labels.
        :param transform: albumentations transformation pipeline.
        """
        self.dataframe = dataframe
        self.transform = transform
        

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Retrieve image path from dataframe
        img_path = self.dataframe.iloc[idx]['Image_path']
        
        # Load image using PIL
        image = Image.open(img_path).convert('RGB')  # Convert image to RGB
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image=np.array(image))['image']  # Convert to numpy array and apply transform
        
        # Get labels from the dataframe
        mask_label = torch.tensor(self.dataframe.iloc[idx]['Mask_label'], dtype=torch.long)
        gender_label = torch.tensor(self.dataframe.iloc[idx]['Gender_label'], dtype=torch.long)
        age_label = torch.tensor(self.dataframe.iloc[idx]['Age_label'], dtype=torch.long)
        total_label = torch.tensor(self.dataframe.iloc[idx]['Total_label'], dtype=torch.long)
        
        # Return the image and the corresponding labels
        return image, mask_label, gender_label, age_label, total_label


  warn(f"Failed to load image Python extension: {e}")


In [6]:
# Example of how to define transformations using albumentations

# Instantiate the dataset with the dataframe and the transform
dataset = CustomDataset(dataframe=df)