In [None]:
import os
import csv
import time
import cv2

# gray img and colored img have 2 differents files for labels
def is_black_and_white(image_path):
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Calculate the absolute difference between the original image and the grayscale image
    diff = cv2.absdiff(image, cv2.cvtColor(gray_image, cv2.COLOR_GRAY2BGR))
    # Calculate the sum of absolute differences across the three color channels
    diff_sum = cv2.sumElems(diff)
    # If the sum of differences is close to zero, the image is black and white
    return sum(diff_sum) < 1.0


headers = [
    "Name",
    "Path",
    "Identity",
    "Gender_code",
    "Gender",
    "Age",
    'Race_code',
    "Race",
    "date of birth",
    "Emotion_code",
    "Neutral",
    "Anger",
    "Scream",
    "Contempt",
    "Disgust",
    "Fear",
    "Happy",
    "Sadness",
    "Surprise",
    "Sun glasses",
    "Scarf",
    "Eyeglasses",
    "Beard",
    "Hat",
    "Angle",
]

directory = "root_dir\datasets\original\morph\img"
gray_attributes = "root_dir\datasets\original\morph\MORPH_Album1_PGMDATA.csv"
colored_attributes = "root_dir\datasets\original\morph\MORPH_Album2_comp.csv"

labels_morph = []
c = 0
for img_name in os.listdir(directory):
    c += 1
    if c == 1:
        time_start = time.time()

    if img_name.lower().endswith((".png", ".jpg", ".jpeg")):
        img_path = os.path.join(directory, img_name)

        infos = img_name.split(".")[0]
        
        id = infos.split("_")[0]
        label = {
            "Name": img_name,
            "Path": img_path,
            "Identity": id,
            "Gender_code": "",
            "Gender": "",
            "Age": "",
            'Race_code':'',
            "Race": "",
            "date of birth": "",
            "Emotion_code": "",
            "Neutral": "",
            "Anger": "",
            "Scream": "",
            "Contempt": "",
            "Disgust": "",
            "Fear": "",
            "Happy": "",
            "Sadness": "",
            "Surprise": "",
            "Sun glasses": "",
            "Scarf": "",
            "Eyeglasses": "",
            "Beard": "",
            "Hat": "",
            "Angle": "",
        }

        
        gender = infos.split("_")[1][1]
        if gender == "M":
            label["Gender"] = "Male"
            label["Gender_code"] = 1
        else:
            label["Gender"] = "Female"
            label["Gender_code"] = -1
            
        age = infos.split("_")[1][2:]
        label["Age"] = age


        if not is_black_and_white(img_path):
            if id[:2] == "00":
                id_to_search = id[2:]
            elif id[0] == "0":
                id_to_search = id[1:]
            with open(colored_attributes, "r", newline="") as file:
                reader = csv.DictReader(file)
                for row in reader:
                    if row["id_num"] == id_to_search:
                        race = row["race"]
                        date_of_birth = row["dob"]
                if race == "W":
                    label["Race"] = "White"
                    label["Race_code"] = 0
                elif race == "B":
                    label["Race"] = "Black"
                    label["Race_code"] =1
                elif race == "I":
                    label["Race"] = "Indian"
                    label["Race_code"] =2
                elif race == "H":
                    label["Race"] = "Hispanic"
                    label["Race_code"] =3
                elif race == "O":
                    label["Race"] = "Other"
                    label["Race_code"] =5
                elif race == "A":
                    label["Race"] = "Asian"
                    label["Race_code"] =4
                else:
                    print("error colored race:", race)
        else:
            with open(gray_attributes, "r", newline="") as file:
                reader = csv.DictReader(file)
                for row in reader:
                    if row["id_num"] == id:
                        race = row["race"]
                        date_of_birth = row["dob"]
                        facial_hair = row["facial_hair"]
                        glasses = row["glasses"]
                if race == "African-American.Black":
                    label["Race"] = "Black"
                    label["Race_code"] =1
                elif race == "White":
                    label["Race"] = "White"
                    label["Race_code"] =0
                elif race == "other":
                    label["Race"] = "Other"
                    label["Race_code"] =5
                else:
                    print("error gray race", race)
                label["date of birth"] = date_of_birth.replace("-", ".")
                if glasses == "False":
                    label["Eyeglasses"] = -1
                else:
                    label["Eyeglasses"] = 1
                if facial_hair == "True":
                    label["Beard"] = 1
                else:
                    label["Beard"] = -1
    labels_morph.append(label)

    if c == 1:
        end_time = time.time()
        duration_sec = len(os.listdir(directory)) * (end_time - time_start)
        duration_hour = duration_sec / 3600
        print(f"estimated duration:{duration_hour} hours ~ {duration_sec} sec")


with open("root_dir\datasets\labels\\morph_labels.csv", "w", newline="") as csv_file:
    csv_writer = csv.DictWriter(csv_file, fieldnames=headers)
    csv_writer.writeheader()
    csv_writer.writerows(labels_morph)

with open("root_dir\datasets\labels\\morph_labels.csv", "r", newline="") as csv_file:
    csv_reader = csv.reader(csv_file)
    c = 0
    for line in csv_reader:
        c += 1
        if c % 500 == 0:
            print(line)

There was some issue with the previous code regarding the age, the gender and the date of birth. It took 9 hours to run over the full dataset so I chose to fix these issue by creating to other program. 
It's the program below. I also regrouped all the data for each img on the same csv file "all_data_morph.csv"

In [4]:
# DATE OF BIRTH 


import pandas as pd
import os
import csv
import numpy as np
import time

csv_file_out = "root_dir\datasets\labels\morph_labels.csv"
all_data = "root_dir\datasets\original\morph\\all_data_morph.csv"
directory = "root_dir\datasets\original\morph\img"
nb_img = len(os.listdir(directory))
df = pd.read_csv(csv_file_out)

max_iteration = 60000
threshold = 40000

df.to_csv(csv_file_out, index=False)
c = 0
for img_name in os.listdir(directory):
    if c >= max_iteration:
        break
    if c<threshold:
        c+=1
        continue
    c += 1
    progression = np.round(100 * c / nb_img, 2)
    print(f"\n Progession: {progression}% \n")

    if img_name.lower().endswith((".png", ".jpg", ".jpeg")):
        img_path = os.path.join(directory, img_name)

        infos = img_name.split(".")[0]
        id_to_search = infos.split("_")[0]
        # print('id1:', id_to_search)
        if id_to_search[:2] == "00":
            id_to_search = id_to_search[2:]
        elif id_to_search[0] == "0":
            id_to_search = id_to_search[1:]
        with open(all_data, "r", newline="") as file:
            reader = csv.DictReader(file)
            for row in reader:
                if row["id_num"] == id_to_search:
                    date_of_birth = row["dob"]
                    if len(date_of_birth.split("-")[0]) == 4:
                        date_of_birth = date_of_birth.split("-")
                        date_of_birth = (
                            date_of_birth[1]
                            + "."
                            + date_of_birth[2]
                            + "."
                            + date_of_birth[0]
                        )
        # print('id :',id_to_search,'| dob :',date_of_birth)

        mask = df["Name"] == img_name  # Filtre pour l'ID correspondant
        df.loc[mask, "date of birth"] = date_of_birth
print('last_image: ', img_name)
df.to_csv(csv_file_out, index=False)


 Progession: 69.81% 


 Progession: 69.81% 


 Progession: 69.82% 


 Progession: 69.82% 


 Progession: 69.82% 


 Progession: 69.82% 


 Progession: 69.82% 


 Progession: 69.82% 


 Progession: 69.83% 


 Progession: 69.83% 


 Progession: 69.83% 


 Progession: 69.83% 


 Progession: 69.83% 


 Progession: 69.83% 


 Progession: 69.84% 


 Progession: 69.84% 


 Progession: 69.84% 


 Progession: 69.84% 


 Progession: 69.84% 


 Progession: 69.85% 


 Progession: 69.85% 


 Progession: 69.85% 


 Progession: 69.85% 


 Progession: 69.85% 


 Progession: 69.85% 


 Progession: 69.86% 


 Progession: 69.86% 


 Progession: 69.86% 


 Progession: 69.86% 


 Progession: 69.86% 


 Progession: 69.86% 


 Progession: 69.87% 


 Progession: 69.87% 


 Progession: 69.87% 


 Progession: 69.87% 


 Progession: 69.87% 


 Progession: 69.88% 


 Progession: 69.88% 


 Progession: 69.88% 


 Progession: 69.88% 


 Progession: 69.88% 


 Progession: 69.88% 


 Progession: 69.89% 


 Progessio

In [None]:
# AGE, GENDER

import pandas as pd
import os
import csv
import numpy as np
import time

csv_file = "root_dir\datasets\labels\morph_labels.csv"
df = pd.read_csv(csv_file)
directory = "root_dir\datasets\original\morph\img"



# Enregistrer les modifications dans le fichier CSV
df.to_csv(csv_file, index=False)
c=0
for img_name in os.listdir(directory):
    c+=1
    nb_img = len(os.listdir(directory))
    progression = np.round(100*c/nb_img,2)
    print(f"\n Progession: {progression}% \n") 
    
    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(directory, img_name)

        infos = img_name.split('.')[0]
        id = infos.split('_')[0]
        if len(infos.split('_')[1]) == 4:
            gender = infos.split('_')[1][1]
            age = infos.split('_')[1][2:]
        else:
            gender = infos.split('_')[1][2]
            age = infos.split('_')[1][3:]
        
        if len(age)!=2:
            print("Error age:",age)
    
        if gender=='M':
            gender='Male'
            gender_code=1
        elif gender == 'F':
            gender='Female'
            gender_code=-1
        else:
            print('Error gender:',gender)
        
        # Mettre à jour les valeurs dans le DataFrame
        mask = (df['Name'] == img_name)  # Filtre pour l'ID correspondant
        df.loc[mask, 'Age'] = age
        df.loc[mask, 'Gender'] = gender
        df.loc[mask, 'Gender_code'] = gender_code

# Enregistrer les modifications dans le fichier CSV
df.to_csv(csv_file, index=False)
    