In [1]:
import pandas as pd
import os
import numpy as np

#ref: https://fairyonice.github.io/Welcome-to-CelebA.html
def get_annotation(fnmtxt, verbose=True):
    if verbose:
        print("_"*70)
        print(fnmtxt)
    
    rfile = open( fnmtxt , 'r' ) 
    texts = rfile.read().split("\n") 
    rfile.close()

    columns = np.array(texts[1].split(" "))
    columns = columns[columns != ""]
    df = []
    for txt in texts[2:]:
        txt = np.array(txt.split(" "))
        txt = txt[txt!= ""]
    
        df.append(txt)
        
    df = pd.DataFrame(df)

    if df.shape[1] == len(columns) + 1:
        columns = ["filepath"]+ list(columns)
    df.columns = columns   
    df = df.dropna()
    if verbose:
        print(" Total number of annotations {}\n".format(df.shape))
        print(df.head())
    ## cast to integer
    for nm in df.columns:
        if nm != "filepath":
            df[nm] = pd.to_numeric(df[nm],downcast="integer")
    return(df)


In [2]:
df = pd.read_csv("list_eval_partition.txt", sep=" ", header=None).rename(columns={0: "filepath", 1:"type"})
df

Unnamed: 0,filepath,type
0,000001.jpg,0
1,000002.jpg,0
2,000003.jpg,0
3,000004.jpg,0
4,000005.jpg,0
...,...,...
202594,202595.jpg,2
202595,202596.jpg,2
202596,202597.jpg,2
202597,202598.jpg,2


In [3]:
train_len = len(df[df["type"] == 0])
val_len = len(df[df["type"] == 1])
test_len = len(df[df["type"] == 2])

print("train: ", train_len, "val: ", val_len, "test: ", test_len)
print("Total:", len(df))

train:  162770 val:  19867 test:  19962
Total: 202599


In [4]:
attr_df = get_annotation("Anno/list_attr_celeba.txt", verbose=False)
attr_df

Unnamed: 0,filepath,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202594,202595.jpg,-1,-1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
202595,202596.jpg,-1,-1,-1,-1,-1,1,1,-1,-1,...,-1,1,1,-1,-1,-1,-1,-1,-1,1
202596,202597.jpg,-1,-1,-1,-1,-1,-1,-1,-1,1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
202597,202598.jpg,-1,1,1,-1,-1,-1,1,-1,1,...,-1,1,-1,1,1,-1,1,-1,-1,1


In [5]:
data_df = pd.concat([df.set_index('filepath'), attr_df.set_index('filepath')], axis=1, join="inner")
data_df

Unnamed: 0_level_0,type,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
filepath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000001.jpg,0,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
000002.jpg,0,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
000003.jpg,0,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
000004.jpg,0,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
000005.jpg,0,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202595.jpg,2,-1,-1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
202596.jpg,2,-1,-1,-1,-1,-1,1,1,-1,-1,...,-1,1,1,-1,-1,-1,-1,-1,-1,1
202597.jpg,2,-1,-1,-1,-1,-1,-1,-1,-1,1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
202598.jpg,2,-1,1,1,-1,-1,-1,1,-1,1,...,-1,1,-1,1,1,-1,1,-1,-1,1


In [6]:
# data_df[data_df["5_o_Clock_Shadow"] == -1]
for c in data_df.columns:
    data_df.loc[data_df[c] == -1, c] = 0
#     data_df[data_df[c] == -1] = 0
data_df

Unnamed: 0_level_0,type,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
filepath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000001.jpg,0,0,1,1,0,0,0,0,0,0,...,0,1,1,0,1,0,1,0,0,1
000002.jpg,0,0,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,1
000003.jpg,0,0,0,0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,0,1
000004.jpg,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
000005.jpg,0,0,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202595.jpg,2,0,0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1
202596.jpg,2,0,0,0,0,0,1,1,0,0,...,0,1,1,0,0,0,0,0,0,1
202597.jpg,2,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,1
202598.jpg,2,0,1,1,0,0,0,1,0,1,...,0,1,0,1,1,0,1,0,0,1


In [8]:
train_df = data_df[data_df["type"] == 0].drop(columns=["type"])
eval_df = data_df[data_df["type"] == 1].drop(columns=["type"])
test_df = data_df[data_df["type"] == 2].drop(columns=["type"])

In [9]:
# save to file
train_df.to_csv("train_40_att_list.txt", sep=" ", header=False)
eval_df.to_csv("val_40_att_list.txt", sep=" ", header=False)
test_df.to_csv("test_40_att_list.txt", sep=" ", header=False)

In [1]:
from torchvision import get_image_backend
t = get_image_backend()
t

'PIL'

In [2]:
from PIL import Image
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

In [4]:
img = pil_loader("./img_align_celeba/202595.jpg")

In [6]:
img.shape

AttributeError: 'Image' object has no attribute 'shape'

In [7]:
import torchvision.transforms as transforms

In [11]:
transform = transforms.ToTensor()
t = transform(img)
t.shape

torch.Size([3, 218, 178])

In [12]:
t

tensor([[[0.8706, 0.8706, 0.8745,  ..., 0.8549, 0.8510, 0.8510],
         [0.8706, 0.8706, 0.8745,  ..., 0.8549, 0.8510, 0.8510],
         [0.8706, 0.8706, 0.8745,  ..., 0.8549, 0.8510, 0.8510],
         ...,
         [0.8588, 0.8510, 0.8392,  ..., 0.8118, 0.7137, 0.7608],
         [0.8745, 0.8588, 0.8392,  ..., 0.8549, 0.6549, 0.8353],
         [0.8745, 0.8588, 0.8392,  ..., 0.8588, 0.6549, 0.8353]],

        [[0.8824, 0.8824, 0.8863,  ..., 0.8588, 0.8510, 0.8510],
         [0.8824, 0.8824, 0.8863,  ..., 0.8588, 0.8510, 0.8510],
         [0.8824, 0.8824, 0.8863,  ..., 0.8588, 0.8510, 0.8510],
         ...,
         [0.8431, 0.8471, 0.8549,  ..., 0.7765, 0.7098, 0.7569],
         [0.8588, 0.8549, 0.8549,  ..., 0.8118, 0.6471, 0.8275],
         [0.8588, 0.8549, 0.8549,  ..., 0.8118, 0.6431, 0.8235]],

        [[0.9176, 0.9176, 0.9216,  ..., 0.8784, 0.8902, 0.8902],
         [0.9176, 0.9176, 0.9216,  ..., 0.8784, 0.8902, 0.8902],
         [0.9176, 0.9176, 0.9216,  ..., 0.8784, 0.8902, 0.