# Environment Preparing

In [6]:

from PIL import Image
import os
import torchvision.utils as vutils
import random
import torch
from torchvision import transforms
import pandas as pd
from torchvision import datasets
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


# Data Augmentation

In [7]:
base_transforms = transforms.Compose([transforms.CenterCrop((178, 178)),
                                       transforms.Resize((128, 128)),
                                       #transforms.Grayscale(),                                       
                                       #transforms.Lambda(lambda x: x/255.),
                                       transforms.ToTensor()])

In [8]:
noop_transform = transforms.Lambda(lambda x: x)

augmentation_transforms = [
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.7, contrast=0.7),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value='random'),
    # transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),  # 模糊处理
    noop_transform
]

# Data Loading

In [9]:
img_dir = '../dataset/CelebA/img_align_celeba/img_align_celeba'
attr_dir = '../dataset/CelebA/list_attr_celeba.txt'
attrs_df = pd.read_csv(attr_dir, delim_whitespace=True, skiprows=1, header=0,index_col=0,usecols=['Male'])
attrs_df.index.name = "image_id"
attrs_df.loc[attrs_df['Male']==-1,'Male']=0

  attrs_df = pd.read_csv(attr_dir, delim_whitespace=True, skiprows=1, header=0,index_col=0,usecols=['Male'])


In [10]:
attrs_df

Unnamed: 0_level_0,Male
image_id,Unnamed: 1_level_1
000001.jpg,0
000002.jpg,0
000003.jpg,1
000004.jpg,0
000005.jpg,0
...,...
202595.jpg,0
202596.jpg,1
202597.jpg,1
202598.jpg,0


In [11]:
BATCH_SIZE = 16

In [12]:
partition_df = pd.read_csv('../dataset/CelebA/list_eval_partition.txt',sep=' ',names=['image_id', 'partition'],index_col=False)
partition_df.set_index('image_id', inplace=True)

In [13]:
partition_df

Unnamed: 0_level_0,partition
image_id,Unnamed: 1_level_1
000001.jpg,0
000002.jpg,0
000003.jpg,0
000004.jpg,0
000005.jpg,0
...,...
202595.jpg,2
202596.jpg,2
202597.jpg,2
202598.jpg,2


In [14]:
df = pd.merge(attrs_df,partition_df,on='image_id')

In [15]:
df

Unnamed: 0_level_0,Male,partition
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.jpg,0,0
000002.jpg,0,0
000003.jpg,1,0
000004.jpg,0,0
000005.jpg,0,0
...,...,...
202595.jpg,0,2
202596.jpg,1,2
202597.jpg,1,2
202598.jpg,0,2


In [16]:
df.to_csv('celeba-gender-partitions.csv')


In [17]:
tmp = pd.read_csv('./celeba-gender-partitions.csv', index_col=0)
tmp.head()

Unnamed: 0_level_0,Male,partition
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.jpg,0,0
000002.jpg,0,0
000003.jpg,1,0
000004.jpg,0,0
000005.jpg,0,0


In [18]:
df.loc[df['partition'] == 0].to_csv('celeba-gender-train.csv')
df.loc[df['partition'] == 1].to_csv('celeba-gender-valid.csv')
df.loc[df['partition'] == 2].to_csv('celeba-gender-test.csv')


In [19]:
t1 = pd.read_csv('celeba-gender-train.csv')
t1.head()

Unnamed: 0,image_id,Male,partition
0,000001.jpg,0,0
1,000002.jpg,0,0
2,000003.jpg,1,0
3,000004.jpg,0,0
4,000005.jpg,0,0


In [20]:
class CelebADataset(Dataset):
    def __init__(self,img_dir,df,base_transforms,augmentation_transforms=None):
        self.df = df
        self.img_dir = img_dir
        self.image_names = df.index.values
        self.base_transforms = base_transforms
        self.augmentation_transforms = augmentation_transforms
        self.y = df['Male'].values
    
    def __len__(self):
        return self.y.shape[0]            
    
    def __getitem__(self, idx):
        
        label =  self.y[idx]
        img_path = os.path.join(self.img_dir, self.image_names[idx])
        image = Image.open(img_path)
        

        image = self.base_transforms(image)
        
        if self.augmentation_transforms:
            augmentation = random.choice(self.augmentation_transforms)
            image = augmentation(image)
        
        return image, label
        

In [21]:
test_df = pd.read_csv('./celeba-gender-test.csv',index_col=0)
train_df = pd.read_csv('./celeba-gender-train.csv',index_col=0)
valid_df = pd.read_csv('./celeba-gender-valid.csv',index_col=0)

In [22]:
train_dataset = CelebADataset(img_dir=img_dir,df=test_df,base_transforms=base_transforms,augmentation_transforms=augmentation_transforms)

valid_dataset = CelebADataset(img_dir=img_dir,df=train_df,base_transforms=base_transforms,augmentation_transforms=augmentation_transforms)

test_dataset = CelebADataset(img_dir=img_dir,df=valid_df,base_transforms=base_transforms,augmentation_transforms=augmentation_transforms)

In [23]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [24]:
images, labels = next(iter(test_loader))

# 打印图像和标签的形状和部分数据
print("Image batch shape:", images.shape)
print("Label batch shape:", labels.shape)
print("First batch of images:", images[:2])  # 只打印前两个图像
print("First batch of labels:", labels[:2])  # 只打印前两个标签

Image batch shape: torch.Size([16, 3, 128, 128])
Label batch shape: torch.Size([16])
First batch of images: tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.2078, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.2078, 0.0000, 0.0000],
          [0.2118, 0.2118, 0.2118,  ..., 0.2078, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.3098,  ..., 0.3255, 0.4118, 0.4549],
          [0.0000, 0.0000, 0.1686,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.1255,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.2000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.2000, 0.0000, 0.0000],
          [0.2039, 0.2039, 0.2039,  ..., 0.2000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.3020,  ..., 0.3216, 0.4078, 0.4510],
          [0.0000, 0.0000, 0.1647,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.1216,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.2431, 0.0000, 0.0000],
