在PyTorch中，自定义数据类通常需要继承`torch.utils.data.Dataset`类，并实现`__init__`、`__len__`和`__getitem__`方法。以下是一个详细的示例，它展示了如何创建一个自定义数据集类。

## 自定义数据集类示例

假设有一个自定义的数据集，其中包含图片和对应的标签。可以按照以下步骤构建一个自定义数据集类：

```python
import os
import pandas as pd
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import Dataset

class CustomImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): 包含图像路径和标签的信息的CSV文件路径。
            root_dir (string): 图像文件夹的路径。
            transform (callable, optional): 可选的转换，应用于样本。
        """
        self.labels_df = pd.read_csv(csv_file)  # 读取标签
        self.root_dir = root_dir  # 图像目录
        self.transform = transform  # 转换

    def __len__(self):
        """返回数据集的样本数量"""
        return len(self.labels_df)

    def __getitem__(self, idx):
        """根据索引获取一条数据（图像和标签）"""
        img_name = os.path.join(self.root_dir, self.labels_df.iloc[idx, 0])  # 获取图像路径
        image = Image.open(img_name)  # 打开图像
        label = self.labels_df.iloc[idx, 1]  # 获取对应标签

        # 应用转换
        if self.transform:
            image = self.transform(image)

        return image, label
```

### 使用自定义数据集类

一旦定义了数据集类，就可以使用它来创建数据加载器。以下是如何使用刚才定义的 `CustomImageDataset` 类的示例：

```python
import torch
from torch.utils.data import DataLoader

# 1. 定义图像转换
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # 将图像调整为28x28
    transforms.ToTensor(),  # 转换为Tensor
    transforms.Normalize((0.5,), (0.5,))  # 归一化
])

# 2. 创建数据集实例
data_path = 'dataset/labels.csv'  # CSV文件路径
image_folder = 'dataset/images/'  # 图像目录
dataset = CustomImageDataset(csv_file=data_path, root_dir=image_folder, transform=transform)

# 3. 创建数据加载器
train_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

# 4. 测试数据加载器
for images, labels in train_loader:
    print(images.shape)  # 打印图像的形状
    print(labels)  # 打印标签
    break  # 只展示第一批数据
```

### 解释

1. **`__init__` 方法**：读取 CSV 文件中的标签，并保存图像文件的根目录和可选的转换。
2. **`__len__` 方法**：返回数据集中样本的数量。
3. **`__getitem__` 方法**：根据给定的索引加载图像和标签，在加载图像后应用转换。

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 自定义数据集类
class CustomImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images  # 图像特征
        self.labels = labels  # 标签
        self.transform = transform  # 转换

    def __len__(self):
        return len(self.images)  # 返回样本数量

    def __getitem__(self, idx):
        img_name = self.images[idx]  # 获取图像路径
        image = Image.open(img_name)  # 打开图像
        label = self.labels[idx]  # 获取标签

        if self.transform:
            image = self.transform(image)  # 应用转换

        return image, label  # 返回图像和标签

# 假设我们有数据框 df，包含图像路径和标签
# df = pd.read_csv('dataset/labels.csv')  # 示例

# 获取图像路径和标签
df = pd.read_csv('path')

images = df['filename'].values
labels = df['label'].values

# 使用 train_test_split 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# 进一步拆分训练集为训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 新的训练集占60%

# 定义转换
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 创建数据集实例
train_dataset = CustomImageDataset(X_train, y_train, transform=transform)
val_dataset = CustomImageDataset(X_val, y_val, transform=transform)
test_dataset = CustomImageDataset(X_test, y_test, transform=transform)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [None]:
import os
import cv2
import numpy as np
import copy
from tqdm import tqdm
from PIL import Image
import requests

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor
from torchvision import transforms as TF
import torch.nn.functional as F
from torch.optim import AdamW

from transformers import SegformerForSemanticSegmentation
from transformers import get_scheduler

from sklearn.metrics import jaccard_score

In [None]:
class BDDDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = [img for img in os.listdir(images_dir) if img.endswith('.jpg')]
        self.masks = [mask.replace('.jpg', '.png') for mask in self.images]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, self.masks[idx])
        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path).convert('L')  # Convert mask to grayscale.
        
        # Convert mask to binary format with 0 and 1 values.
        mask = np.array(mask)
        mask = (mask > 0).astype(np.uint8)  # Assuming non-zero pixels are lanes.
        
        # Convert to PIL Image for consistency in transforms.
        mask = Image.fromarray(mask)

        if self.transform:
            image = self.transform(image)
            # Assuming to_tensor transform is included which scales pixel values between 0-1.
            # mask = to_tensor(mask)  # Convert the mask to [0, 1] range.
        mask = TF.functional.resize(img=mask, size=[360, 640], interpolation=Image.NEAREST)
        mask = TF.functional.to_tensor(mask)
        mask = (mask > 0).long()  # Threshold back to binary and convert to LongTensor.

        return image, mask

def mean_iou(preds, labels, num_classes):
    # Flatten predictions and labels.
    preds_flat = preds.view(-1)
    labels_flat = labels.view(-1)

    # Check that the number of elements in the flattened predictions.
    # and labels are equal.
    if preds_flat.shape[0] != labels_flat.shape[0]:
        raise ValueError(f"Predictions and labels have mismatched shapes: "
                         f"{preds_flat.shape} vs {labels_flat.shape}")

    # Calculate the Jaccard score for each class.
    iou = jaccard_score(labels_flat.cpu().numpy(), preds_flat.cpu().numpy(),
                        average=None, labels=range(num_classes))

    # Return the mean IoU.
    return np.mean(iou)

In [None]:
# Define the appropriate transformations.
transform = TF.Compose([
    TF.Resize((360, 640)),
    TF.ToTensor(),
    TF.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the dataset.
train_dataset = BDDDataset(images_dir='./deep_drive_10K/train/images',
                           masks_dir='./deep_drive_10K/train/masks',
                           transform=transform)

valid_dataset = BDDDataset(images_dir='./deep_drive_10K/valid/images',
                           masks_dir='./deep_drive_10K/valid/masks',
                           transform=transform)

# Create the data loaders.
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=6)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False, num_workers=6)