In [1]:
import os
import cv2
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torchvision.transforms as transforms
import pandas as pd
from torch.nn.utils.rnn import pad_sequence


In [2]:
root_path = r"D:\dataset\eye\Train"

In [3]:
csv_path = root_path + "/Train.csv"

In [4]:
root_path = r"F:\Train\Train"

In [5]:
csv_path = root_path + "/Transformed_Train.csv"

In [6]:
csv_path

'F:\\Train\\Train/Transformed_Train.csv'

In [7]:
data_source = pd.read_csv(csv_path)

In [8]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None,max_images =10):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.max_images = max_images

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        folder_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, -1])
        image_count = len(os.listdir(folder_name))
        print("folder {}".format(folder_name))
        
        images = [Image.open(os.path.join(folder_name, f"{i}.jpg")) for i in range(image_count)]
        print("images {}".format(len(images)))
        
        # Normalize and convert images to tensor
        if self.transform:
            images = [self.transform(image) for image in images]
        
        while len(images) < self.max_images:
            zero_tensor = torch.zeros([3, 224, 224])  # Assuming the images are 224x224 and 3-channel after transformation
            images.append(zero_tensor)
        images = torch.stack(images)  # Assuming all images in folder are related and stacking them into a single tensor
        
        labels = self.data_frame.iloc[idx, :-2]  # Excluding ID and Folder columns
        labels = torch.tensor(labels.values.astype('float32'))
        
        return images, labels

In [9]:

# Transformation (Normalization)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resizing to 224x224 for example
    transforms.ToTensor(),  # Transform it to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing
])


In [13]:

dataset = CustomDataset(csv_file=csv_path, root_dir=root_path, transform=transform)

def collate_fn(batch):
    images, labels = zip(*batch)
    
    # Pad images
    images = pad_sequence(images, batch_first=True, padding_value=0)
    
    # Stack labels
    labels = torch.stack(labels)
    
    return images, labels

# DataLoader with custom collate_fn
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

# Display a sample batch
for i_batch, (images, labels) in enumerate(dataloader):
    print(f"Batch {i_batch + 1}")
    print(f"Images shape: {images.shape}, Labels shape: {labels.shape}")
    break  # Display only one batch


folder F:\Train\Train\1578_L
images 9
folder F:\Train\Train\745_L
images 23
folder F:\Train\Train\512_R
images 3
folder F:\Train\Train\187_L
images 14
Batch 1
Images shape: torch.Size([4, 23, 3, 224, 224]), Labels shape: torch.Size([4, 14])


##  处理csv文件

In [11]:


from sklearn.preprocessing import LabelEncoder
import pandas as pd

# 读取CSV文件
data_frame = pd.read_csv('F:/Train/Train/Train.csv')

# 初始化LabelEncoder
label_encoder = LabelEncoder()

# 创建一个新的DataFrame用于存储转换后的数据
transformed_data_frame = data_frame.copy()

# 对每一个标签列应用LabelEncoder
for column in data_frame.columns[:-2]:  # 排除 ID 和 Folder 列
    transformed_data_frame[column] = label_encoder.fit_transform(data_frame[column])

# 保存转换后的DataFrame
transformed_data_frame.to_csv('F:/Train/Transformed_Train.csv', index=False)
