In [2]:
import pandas as pd

In [4]:
# Import the mapping file
CelebA_HQ_CelebA_Mapping = pd.read_csv(
    'data/CelebAMask-HQ/CelebA-HQ-to-CelebA-mapping.txt', 
    sep=r'\s+',  # Use raw string to avoid syntax warning
    names=['idx', 'orig_idx', 'orig_file'],
    skiprows=1
)

# Import the pose annotations
CelebA_HQ_Pose_df = pd.read_csv(
    'data/CelebAMask-HQ/CelebAMask-HQ-pose-anno.txt',
    sep=r'\s+',  # Use raw string to avoid syntax warning
    names=['image_id', 'Yaw', 'Pitch', 'Roll'],
    skiprows=2
)

import pandas as pd

# Path to the attribute file
file_path = 'data/CelebAMask-HQ/CelebAMask-HQ-attribute-anno.txt'

# Read the lines
with open(file_path, 'r') as f:
    lines = f.readlines()

# Skip the first line (number of entries)
# Second line is the header (attribute names)
# Remaining lines are data
attribute_names = lines[1].strip().split()
data = [line.strip().split() for line in lines[2:]]

# Create DataFrame
CelebA_HQ_Attribute_df = pd.DataFrame(data, columns=['image_id'] + attribute_names)

# Convert attribute columns from strings to integers
for attr in attribute_names:
    CelebA_HQ_Attribute_df[attr] = CelebA_HQ_Attribute_df[attr].astype(int)

In [5]:
CelebA_HQ_CelebA_Mapping

Unnamed: 0,idx,orig_idx,orig_file
0,0,119613,119614.jpg
1,1,99094,099095.jpg
2,2,200121,200122.jpg
3,3,81059,081060.jpg
4,4,202040,202041.jpg
...,...,...,...
29995,29995,52545,052546.jpg
29996,29996,86631,086632.jpg
29997,29997,170415,170416.jpg
29998,29998,74713,074714.jpg


In [6]:
CelebA_HQ_Pose_df

Unnamed: 0,image_id,Yaw,Pitch,Roll
0,0.jpg,-16.761650,-3.540695,-0.468292
1,1.jpg,8.853630,-16.055931,-1.150886
2,2.jpg,35.265182,-6.890411,-1.581253
3,3.jpg,-16.793152,1.010948,0.133667
4,4.jpg,5.474228,-12.340668,-0.894409
...,...,...,...,...
29995,29995.jpg,0.108932,-0.664413,0.135849
29996,29996.jpg,4.065002,-1.689423,-1.262672
29997,29997.jpg,5.378021,-12.933311,1.843475
29998,29998.jpg,16.967903,-6.059593,-3.954010


In [7]:
CelebA_HQ_Attribute_df

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,0.jpg,-1,1,1,1,-1,-1,1,-1,-1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
1,1.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
2,2.jpg,-1,-1,1,1,-1,-1,1,-1,-1,...,-1,1,-1,1,1,-1,1,-1,-1,1
3,3.jpg,-1,-1,1,-1,-1,-1,-1,1,1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
4,4.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,1,-1,1,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29995.jpg,-1,-1,-1,-1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,1,-1,-1
29996,29996.jpg,1,-1,-1,1,-1,-1,-1,1,-1,...,1,1,-1,-1,-1,-1,-1,-1,-1,1
29997,29997.jpg,-1,-1,-1,-1,-1,1,-1,-1,1,...,-1,-1,1,-1,-1,-1,-1,-1,-1,1
29998,29998.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,1,-1,-1,1,-1,-1,1


In [8]:
# 1. Load CelebA identity info (original dataset)
identity_df_og = pd.read_csv(
    'data/CelebAMask-HQ/identity_CelebA_Original_Dataset_.txt',
    sep=' ',
    names=['orig_file', 'identity']
)

identity_df_og.head()

Unnamed: 0,orig_file,identity
0,000001.jpg,2880
1,000002.jpg,2937
2,000003.jpg,8692
3,000004.jpg,5805
4,000005.jpg,9295


In [9]:
CelebA_HQ_Indentity_df = CelebA_HQ_CelebA_Mapping.merge(identity_df_og, on='orig_file')
CelebA_HQ_Indentity_df.drop(columns=['orig_idx'], inplace=True)
CelebA_HQ_Indentity_df['idx'] = CelebA_HQ_Attribute_df['image_id']
CelebA_HQ_Indentity_df.rename(columns={'idx':'image_id'}, inplace=True)

In [10]:
CelebA_HQ_Indentity_df

Unnamed: 0,image_id,orig_file,identity
0,0.jpg,119614.jpg,7423
1,1.jpg,099095.jpg,7319
2,2.jpg,200122.jpg,6632
3,3.jpg,081060.jpg,3338
4,4.jpg,202041.jpg,9178
...,...,...,...
29995,29995.jpg,052546.jpg,9591
29996,29996.jpg,086632.jpg,155
29997,29997.jpg,170416.jpg,787
29998,29998.jpg,074714.jpg,6902


In [12]:
# export csv
CelebA_HQ_Indentity_df.to_csv("data/CelebAMask-HQ/CelebA-HQ-Identity.csv", index=False)
CelebA_HQ_Attribute_df.to_csv("data/CelebAMask-HQ/CelebA-HQ-Attribute.csv", index=False)
CelebA_HQ_Pose_df.to_csv("data/CelebAMask-HQ/CelebA-HQ-Pose.csv", index=False)