Importing libraries

In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import csv

In [2]:
root = os.getcwd()
#image_folder = r"C:\Users\ASUS\datasets\image"
faces_folder = r"D:\clean_dataset"
names = list(os.listdir(faces_folder))
print(names)

['adham', 'dennis', 'justin', 'kenneth', 'kenneth_brille', 'liza', 'liza_brille', 'miriam', 'steffen', 'syahid', 'syahid_brille', 'vincent']


Label mapping

In [3]:
# Code to obtain label mapping
names_dict = {}
for id,name in enumerate(names):
    if '.csv' not in name:
        names_dict[id] = name

print(names_dict)

{0: 'adham', 1: 'dennis', 2: 'justin', 3: 'kenneth', 4: 'kenneth_brille', 5: 'liza', 6: 'liza_brille', 7: 'miriam', 8: 'steffen', 9: 'syahid', 10: 'syahid_brille', 11: 'vincent'}


Creating dataframe by row

In [4]:
def row_generator(folder_list,names_dict):

    for folder in folder_list:
        for id,name in names_dict.items():
            sub_dir = os.path.join(folder,name)
            if os.path.exists(sub_dir):
                for filename in os.listdir(sub_dir):
                    file_path = os.path.join(sub_dir,filename)
                    #yield file_path,name,id  # for classic network
                    yield file_path,name      # for autoencoder  

row_gen = row_generator([faces_folder],names_dict)

image_counter = 0
try:
    while True:
        # Attempt to get the next item
        next_item = next(row_gen)
        image_counter += 1
        #print(next_item)
except StopIteration:
    print(f"End of iterator reached with {image_counter} images.")

End of iterator reached with 5699 images.


Creating dataframe by column

In [5]:
# Specify the path of the CSV file

# path for autoencoder
csv_file_path = os.path.join(faces_folder, 'label.csv')

# path for classic network
#csv_file_path = os.path.join(r'C:\Users\ASUS\datasets', 'face_label_id.csv')

# Open the CSV file in write mode
with open(csv_file_path, 'w', newline='') as csv_file:
    # Create a CSV writer
    csv_writer = csv.writer(csv_file)

    # Write the header to the CSV file
    # classic network
    #csv_writer.writerow(['filepath', 'label', 'id'])
    
    # autoencoder
    csv_writer.writerow(['filepath', 'label'])

    # Iterate over the rows generated by 'row_generator'
    row_gen = row_generator([faces_folder], names_dict)
    for item in tqdm(row_gen):
        # Write each row to the CSV file
        # classsic network
        #csv_writer.writerow([item[0], item[1], item[2]])
        
        # autoencoder
        csv_writer.writerow([item[0], item[1]])
    

5699it [00:00, 167568.69it/s]


Preprocessing for Model

In [6]:
# csv for classic network
#data_df = pd.read_csv(r"C:\Users\ASUS\datasets\face_label_id.csv")

# csv for autoencoder
data_df = pd.read_csv(r"D:\clean_dataset\label.csv")
data_df

Unnamed: 0,filepath,label,Unnamed: 3
0,D:\clean_dataset\steffen\00000.png,steffen,
1,D:\clean_dataset\steffen\00001.png,steffen,
2,D:\clean_dataset\steffen\00002.png,steffen,
3,D:\clean_dataset\steffen\00003.png,steffen,
4,D:\clean_dataset\steffen\00004.png,steffen,
...,...,...,
5694,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,vincent,11.0
5695,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,vincent,11.0
5696,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,vincent,11.0
5697,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,vincent,11.0


One-Hot Encoding

In [7]:
data_df_encoded = pd.get_dummies(data_df,columns=["label"],dtype=float)
data_df_encoded

Unnamed: 0,filepath,id,label_adham,label_dennis,label_justin,label_kenneth,label_kenneth_brille,label_liza,label_liza_brille,label_miriam,label_steffen,label_syahid,label_syahid_brille,label_vincent
0,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5694,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5695,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5696,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5697,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
data_df_encoded = data_df_encoded.drop(["id"],axis=1)
data_df_encoded

Unnamed: 0,filepath,label_adham,label_dennis,label_justin,label_kenneth,label_kenneth_brille,label_liza,label_liza_brille,label_miriam,label_steffen,label_syahid,label_syahid_brille,label_vincent
0,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5694,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5695,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5696,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5697,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [9]:
data_df_encoded["label"] = data_df["label"]

In [10]:
data_df_encoded

Unnamed: 0,filepath,label_adham,label_dennis,label_justin,label_kenneth,label_kenneth_brille,label_liza,label_liza_brille,label_miriam,label_steffen,label_syahid,label_syahid_brille,label_vincent,label
0,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,adham
1,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,adham
2,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,adham
3,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,adham
4,C:\Users\ASUS\datasets\cleaned-face\adham\0000...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,adham
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5694,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,vincent
5695,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,vincent
5696,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,vincent
5697,C:\Users\ASUS\datasets\cleaned-face\vincent\00...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,vincent


In [8]:
data_df_encoded.to_csv(r"C:\Users\ASUS\datasets\face_label_encoded.csv")