In [1]:
import pandas as pd
from ultralytics import YOLO
import os, cv2, numpy as np, pandas as pd

Here we will create a new label into the labels.csv, it will include if the pose is from a men or a women. 

In [3]:
# Load labels.csv
df = pd.read_csv("data/labels.csv")

# Function to detect gender based on id
def detect_gender(id_str):
    if id_str.startswith("syn_m"):
        return 1   # male
    elif id_str.startswith("syn_f"):
        return 0   # female
    else:
        return -1  # unknown, just in case

# Add new column
df["gender"] = df["id"].apply(detect_gender)

# Save updated CSV
df.to_csv("labels_with_gender.csv", index=False)

print(df.head())


                  id  height  chest_circ  waist_circ  hip_circ  thigh_circ  \
0  syn_m000000-0-Pre  177.76       83.58       73.89     96.10       53.34   
1  syn_m000000-0-Pos  177.76       83.58       73.89     96.10       53.34   
2  syn_m000000-1-Pre  177.76       86.36       69.62     96.23       53.46   
3  syn_m000000-1-Pos  177.76       86.36       69.62     96.23       53.46   
4  syn_m000000-2-Pre  177.76       90.95       65.70     96.60       53.72   

   knee_circ  calf_circ  abd_circ  neck_circ  biceps_circ  gender  
0      36.45      32.81     74.71      35.53        23.53       1  
1      36.45      32.81     74.71      35.53        23.53       1  
2      36.34      34.71     68.73      35.63        26.03       1  
3      36.34      34.71     68.73      35.63        26.03       1  
4      36.27      36.68     62.95      36.23        28.67       1  


In [None]:
# ---- helpers ----
def find_one(dirpath, candidates=("front",), exts=(".png",".jpg",".jpeg",".bmp")):
    if not os.path.isdir(dirpath): 
        return None
    for fn in os.listdir(dirpath):
        stem, ext = os.path.splitext(fn)
        if ext.lower() in exts and stem.lower() in candidates:
            return os.path.join(dirpath, fn)
    return None

def run_pose(model, img_path):
    r = model(img_path, verbose=False)[0]
    if r.keypoints is None or r.keypoints.xy is None or len(r.keypoints.xy) == 0:
        raise RuntimeError(f"No keypoints in {img_path}")
    return r.keypoints.xy[0].cpu().numpy()  # (17,2)

def _w(a,b): return float(abs(a[0]-b[0]))            # horizontal width
def _h(kp):  return float(kp[:,1].max()-kp[:,1].min())  # body height (px)

def feature_vector(front_kp, side_kp=None):
    f = front_kp
    feats = {
        "front_shoulder_px": _w(f[5],f[6]),
        "front_hip_px":      _w(f[11],f[12]),
        "front_knee_px":     _w(f[13],f[14]),
        "front_elbow_px":    _w(f[7], f[8]),
        "front_ankle_px":    _w(f[15],f[16]),
        "front_height_px":   _h(f),
    }
    peito_px       = feats["front_shoulder_px"]*0.90
    cintura_px     = feats["front_shoulder_px"]*0.65 + feats["front_hip_px"]*0.35
    quadril_px     = feats["front_hip_px"]
    coxa_px        = feats["front_hip_px"]*0.55 + feats["front_knee_px"]*0.45
    joelho_px      = feats["front_knee_px"]
    panturrilha_px = feats["front_knee_px"]*0.55 + feats["front_ankle_px"]*0.45
    abdomen_px     = peito_px*0.35 + cintura_px*0.65
    pescoco_px     = feats["front_shoulder_px"]*0.19
    biceps_px      = feats["front_elbow_px"]*0.60
    px_map = {
        "pescoço_px": pescoco_px, "peito_px": peito_px, "cintura_px": cintura_px,
        "quadril_px": quadril_px, "coxa_px": coxa_px, "joelho_px": joelho_px,
        "panturrilha_px": panturrilha_px, "abdomen_px": abdomen_px, "biceps_px": biceps_px
    }

    if side_kp is not None:
        s = side_kp
        feats.update({
            "side_shoulder_span_px": _w(s[5],s[6]),
            "side_hip_span_px":      _w(s[11],s[12]),
            "side_knee_span_px":     _w(s[13],s[14]),
            "side_elbow_span_px":    _w(s[7], s[8]),
            "side_height_px":        _h(s),
        })
    else:
        feats.update({k:0.0 for k in
            ["side_shoulder_span_px","side_hip_span_px","side_knee_span_px","side_elbow_span_px","side_height_px"]})
    return feats, px_map

def infer_gender_from_id(id_str):
    if id_str.startswith("syn_m"): return 1   # male
    if id_str.startswith("syn_f"): return 0   # female
    return -1

def extract_features(csv, root_map,
                     out_feats="features/yolo_features.csv",
                     out_px="features/pixel_widths_front.csv",
                     weights="yolov8m-pose.pt"):
    os.makedirs(os.path.dirname(out_feats), exist_ok=True)
    model = YOLO(weights)
    labels = pd.read_csv(csv)

    # If CSV lacks a gender column, infer it; otherwise keep what’s there
    if "gender" not in labels.columns:
        labels["gender"] = labels["id"].apply(infer_gender_from_id)

    rows, px_rows, missing = [], [], []

    for _, r in labels.iterrows():
        _id = r["id"]
        g   = int(r["gender"])  # 1=male, 0=female, -1=unknown
        # choose root by gender (fallback: try both)
        roots_to_try = []
        if g in (0,1):
            roots_to_try = [root_map.get(g, None)]
        if not roots_to_try or roots_to_try[0] is None:
            roots_to_try = list(root_map.values())  # try both if unknown

        id_dir = None
        for root in roots_to_try:
            p = os.path.join(root, _id)
            if os.path.isdir(p):
                id_dir = p
                break
        if id_dir is None:
            missing.append((_id, "folder not found in any root")); 
            continue

        front = find_one(id_dir, ("front",))
        side  = find_one(id_dir, ("left","side","lateral","lado"))
        if not front:
            missing.append((_id, "front image not found")); 
            continue

        try:
            fkp = run_pose(model, front)
            skp = run_pose(model, side) if side else None
        except Exception as e:
            missing.append((_id, f"pose error: {e}")); 
            continue

        feats, px_map = feature_vector(fkp, skp)
        H = max(feats["front_height_px"], 1.0)
        feats_norm = {k:(v/H) if k.endswith("_px") else v for k,v in feats.items()}

        row = {"id": _id, "gender": g}
        row.update(feats_norm)
        rows.append(row)

        pxrow = {"id": _id, "gender": g}
        pxrow.update(px_map)
        px_rows.append(pxrow)

    pd.DataFrame(rows).to_csv(out_feats, index=False)
    pd.DataFrame(px_rows).to_csv(out_px, index=False)
    print(f"Saved features → {out_feats}  ({len(rows)} rows)")
    print(f"Saved pixel widths → {out_px}  ({len(px_rows)} rows)")
    if missing:
        print("Missing/failed:")
        for m in missing:
            print("  ", m)

# ---- run it ----
CSV = "data/labels.csv"
ROOT_MAP = {
    0: "data/mulheres_15k",  # female
    1: "data/homens_15k",    # male
}

extract_features(CSV, ROOT_MAP)

df = pd.read_csv("features/yolo_features.csv")
print(df.head())
print("Total rows:", len(df))
