In [None]:
import numpy as np
from pathlib import Path
from joblib import dump
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from PIL import Image
import face_recognition
from config import DATA_DIR, MODELS_DIR, FACE_ID_MODEL, LABEL_ENCODER

def load_images_and_labels(data_dir: Path):
    X, y = [], []
    for person_dir in data_dir.iterdir():
        if not person_dir.is_dir():
            continue
        label = person_dir.name
        images = list(person_dir.glob("*.jpg")) + list(person_dir.glob("*.png"))
        for img_path in images:
            try:
                img = Image.open(img_path).convert("RGB")
                img = np.array(img, dtype=np.uint8)
                boxes = face_recognition.face_locations(img, model="hog")
                if not boxes:
                    continue
                enc = face_recognition.face_encodings(img, known_face_locations=[boxes[0]])
                if enc:
                    X.append(enc[0])
                    y.append(label)
            except Exception as e:
                print(f"[WARN] Skipping {img_path}: {e}")
    return np.array(X), np.array(y)

def main():
    MODELS_DIR.mkdir(parents=True, exist_ok=True)
    print(f"[INFO] Loading images from {DATA_DIR.resolve()}")
    X, y = load_images_and_labels(DATA_DIR)

    if len(X) < 2:
        raise RuntimeError("Need at least 2 valid faces to train.")

    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", probability=True, class_weight="balanced"))
    clf.fit(X, y_enc)

    dump(clf, FACE_ID_MODEL)
    dump(le, LABEL_ENCODER)
    print(f"[OK] Model saved to {FACE_ID_MODEL}")
    print(f"[OK] Label encoder saved to {LABEL_ENCODER}")

if __name__ == "__main__":
    main()
