# Import Library

In [5]:
import os
import pandas as pd
import numpy as np
import face_alignment
from face_alignment import LandmarksType
import cv2
from tqdm import tqdm

# Function Library

# Data Loading

In [2]:
df = pd.read_csv('../data/BMI/cleaned_data.csv')
df['img_path'] = df['name'].apply(lambda x: f"../data/BMI/Images/{x}")
df.head()

Unnamed: 0,bmi,gender,is_training,name,img_path
0,34.207396,Male,1,img_0.bmp,../data/BMI/Images/img_0.bmp
1,26.45372,Male,1,img_1.bmp,../data/BMI/Images/img_1.bmp
2,34.967561,Female,1,img_2.bmp,../data/BMI/Images/img_2.bmp
3,22.044766,Female,1,img_3.bmp,../data/BMI/Images/img_3.bmp
4,25.845588,Female,1,img_6.bmp,../data/BMI/Images/img_6.bmp


# Initalization

In [9]:
fa = face_alignment.FaceAlignment(2, flip_input= False, device= 'cpu')

Downloading: "https://www.adrianbulat.com/downloads/python-fan/3DFAN4-4a694010b9.zip" to /Users/bhsst/.cache/torch/hub/checkpoints/3DFAN4-4a694010b9.zip
100%|██████████| 91.9M/91.9M [00:06<00:00, 14.3MB/s]


# Landmark Extraction

In [10]:
landmarks = []
failed_paths = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    img_path = row['img_path']
    try:
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        preds = fa.get_landmarks(img_rgb)
        if preds is not None:
            # Flatten (68, 2) → (136,)
            landmarks.append(preds[0].flatten())
        else:
            landmarks.append([np.nan] * 136)
            failed_paths.append(img_path)

    except Exception as e:
        landmarks.append([np.nan] * 136)
        failed_paths.append(img_path)

100%|██████████| 3962/3962 [26:20<00:00,  2.51it/s]


In [11]:
len(failed_paths)

1

In [12]:
len(landmarks)

3962

In [13]:
landmarks[0]

array([ 34.,  72.,  36.,  86.,  37.,  99.,  39., 111.,  42., 123.,  51.,
       131.,  61., 138.,  72., 143.,  87., 146., 102., 144., 112., 139.,
       119., 134., 124., 126., 129., 116., 131., 104., 134.,  93., 138.,
        79.,  51.,  64.,  57.,  62.,  66.,  62.,  72.,  64.,  79.,  64.,
       104.,  64., 111.,  62., 117.,  62., 124.,  64., 131.,  67.,  91.,
        78.,  91.,  88.,  91.,  96.,  91., 103.,  81., 104.,  86., 106.,
        91., 108.,  96., 106.,  99., 106.,  59.,  74.,  66.,  74.,  71.,
        72.,  77.,  76.,  72.,  78.,  66.,  78., 104.,  76., 109.,  74.,
       116.,  74., 121.,  76., 116.,  79., 109.,  78.,  69., 118.,  77.,
       116.,  86., 114.,  89., 116.,  94., 114., 101., 116., 107., 118.,
       101., 121.,  94., 123.,  89., 123.,  84., 121.,  77., 119.,  71.,
       118.,  84., 118.,  89., 118.,  94., 118., 106., 118.,  94., 118.,
        89., 118.,  84., 118.], dtype=float32)

In [14]:
# Convert to array
landmarks_array = np.array(landmarks)

# Save as .npy
np.save('landmark_features.npy', landmarks_array)

In [15]:
# Save as .csv with BMI and filename
df_landmarks = pd.DataFrame(landmarks_array)
df_landmarks['bmi'] = df['bmi'].values
df_landmarks['name'] = df['name'].values  # Optional
df_landmarks.to_csv('landmark_features.csv', index=False)

print("Saved landmark_features.npy and landmark_features.csv")

Saved landmark_features.npy and landmark_features.csv


In [16]:
# Convert to DataFrame
landmarks_array = np.array(landmarks)
df_landmarks = pd.DataFrame(landmarks_array)

# Add BMI and name for alignment
df_landmarks['bmi'] = df['bmi'].values
df_landmarks['name'] = df['name'].values

# Drop rows with any NaNs
df_landmarks = df_landmarks.dropna()

# Save clean versions
df_landmarks.to_csv('landmark_features.csv', index=False)
np.save('landmark_features.npy', df_landmarks.drop(columns=['bmi', 'name']).values)

print("Cleaned and saved without failed rows.")


Cleaned and saved without failed rows.
