In [None]:
# Imports
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing import image
from PIL import Image
from scipy.spatial.distance import euclidean
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score, log_loss
from sklearn.ensemble import RandomForestClassifier
import joblib
import traceback
import pickle
import os

# Define models directory path
MODELS_DIR = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), 'models'))
os.makedirs(MODELS_DIR, exist_ok=True)

SyntaxError: invalid syntax (1611736120.py, line 17)

In [None]:
# STEP 1-3: Load data, preprocess, train and evaluate Logistic Regression (leakage-free)
csv_path = 'image_features_clean.csv'
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"CSV not found: {csv_path} - run the extraction cell first")

# Load
df = pd.read_csv(csv_path)
print('CSV shape:', df.shape)
display(df.head())

# Determine label column
if 'label' in df.columns:
    y_raw = df['label'].astype(str)
elif 'filename' in df.columns:
    y_raw = df['filename'].astype(str).apply(lambda s: s.split('_')[0])
else:
    raise ValueError("CSV must contain a 'label' or 'filename' column to derive identity labels")

# Build feature matrix X: drop filename/label if present
X_df = df.drop(columns=[c for c in ['filename','label'] if c in df.columns])
print('Feature matrix shape (before conversion):', X_df.shape)

# Ensure numeric
non_numeric = [c for c in X_df.columns if not pd.api.types.is_numeric_dtype(X_df[c])]
if non_numeric:
    raise ValueError(f"Non-numeric feature columns found: {non_numeric}")

X = X_df.astype(np.float32).values

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y_raw)
print('Classes:', list(le.classes_))

# Train/test split (stratify) BEFORE scaling to avoid leakage
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print('Train shape:', X_train.shape, 'Test shape:', X_test.shape)

# Scale features: fit scaler on train only
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Logistic Regression
model = LogisticRegression(max_iter=2000, multi_class='multinomial', solver='lbfgs')
try:
    model.fit(X_train_scaled, y_train)
    print('\nModel training completed')
except Exception:
    traceback.print_exc()
    raise

# Evaluate
y_pred = model.predict(X_test_scaled)
probs = model.predict_proba(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
loss = log_loss(y_test, probs, labels=model.classes_)
print(f"\nTest accuracy: {acc:.4f}  weighted F1: {f1:.4f}  log loss: {loss:.4f}")
print('\nClassification report:\n')
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Save model + preprocessing (joblib) to models directory
joblib.dump(model, os.path.join(MODELS_DIR, 'face_auth_logreg.joblib'))
joblib.dump(scaler, os.path.join(MODELS_DIR, 'face_auth_scaler.joblib'))
joblib.dump(le, os.path.join(MODELS_DIR, 'face_auth_label_encoder.joblib'))
print(f'Saved models to {MODELS_DIR}: face_auth_logreg.joblib, face_auth_scaler.joblib, face_auth_label_encoder.joblib')


CSV shape: (48, 1282)


Unnamed: 0,filename,label,0,1,2,3,4,5,6,7,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,Aubert_neutral.jpeg,Aubert,0.592436,0.445374,0.138666,0.199406,0.234222,1.502202,0.550103,0.029583,...,0.860995,2.957347,2.697805,0.051768,0.00227,0.845628,0.095015,0.069172,2.189625,0.300332
1,Aubert_smiling.jpeg,Aubert,0.333226,0.010147,0.001928,0.778736,0.052578,1.230657,0.227494,0.366348,...,0.628935,1.751879,2.30138,0.268388,0.0,0.576595,0.187293,0.178212,1.706536,0.716214
2,Aubert_surprised.jpeg,Aubert,0.247052,0.007888,0.388474,0.917524,0.08237,0.932131,0.371109,0.423742,...,0.407668,1.974943,2.590067,0.006534,0.0,0.847615,0.058786,0.075061,1.09072,0.925528
3,Jade_neutral.jpeg,Jade,0.0,0.130793,0.0,0.497212,0.0,1.079102,0.007094,0.659984,...,0.88214,0.652241,1.449127,0.0,0.124035,0.267299,0.54098,0.071052,0.891298,0.545639
4,Jade_smiling.jpeg,Jade,0.0,1.095859,0.020419,0.404002,0.227005,0.957205,0.154348,1.643815,...,1.284185,0.727041,2.619657,0.0,0.081618,0.630383,0.13969,0.729029,0.044618,0.548145


Feature matrix shape (before conversion): (48, 1280)
Classes: ['Aubert', 'Jade', 'Liliane', 'Pauline']
Train shape: (38, 1280) Test shape: (10, 1280)

Model training completed

Test accuracy: 1.0000  weighted F1: 1.0000  log loss: 0.1102

Classification report:

              precision    recall  f1-score   support

      Aubert       1.00      1.00      1.00         3
        Jade       1.00      1.00      1.00         2
     Liliane       1.00      1.00      1.00         3
     Pauline       1.00      1.00      1.00         2

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10

Saved: face_auth_logreg.joblib, face_auth_scaler.joblib, face_auth_label_encoder.joblib




In [None]:
# Model comparison: Random Forest and XGBoost (if available)
results = []
# Append logistic regression metrics (from previous cell)
try:
    results.append(('LogisticRegression', acc, f1, loss))
except NameError:
    pass

# Train Random Forest
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
probs_rf = rf.predict_proba(X_test_scaled)
acc_rf = accuracy_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')
loss_rf = log_loss(y_test, probs_rf, labels=rf.classes_)
results.append(('RandomForest', acc_rf, f1_rf, loss_rf))
joblib.dump(rf, os.path.join(MODELS_DIR, 'face_auth_rf.joblib'))

# Try XGBoost if available
try:
    import xgboost as xgb
    xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
    xgb_clf.fit(X_train_scaled, y_train)
    y_pred_xgb = xgb_clf.predict(X_test_scaled)
    probs_xgb = xgb_clf.predict_proba(X_test_scaled)
    acc_xgb = accuracy_score(y_test, y_pred_xgb)
    f1_xgb = f1_score(y_test, y_pred_xgb, average='weighted')
    loss_xgb = log_loss(y_test, probs_xgb, labels=xgb_clf.classes_)
    results.append(('XGBoost', acc_xgb, f1_xgb, loss_xgb))
    joblib.dump(xgb_clf, os.path.join(MODELS_DIR, 'face_auth_xgboost.joblib'))
except Exception as e:
    print('XGBoost not available or failed to train:', e)

# Show results summary
df_results = pd.DataFrame(results, columns=['model','accuracy','f1_weighted','log_loss'])
print(df_results)


XGBoost not available or failed to train: No module named 'xgboost'
                model  accuracy  f1_weighted  log_loss
0  LogisticRegression       1.0     1.000000  0.110218
1        RandomForest       0.8     0.783333  0.755667


In [None]:
# Save alternate model filename and known training features to models directory
joblib.dump(model, os.path.join(MODELS_DIR, "face_recognition_model.joblib"))
np.savez_compressed(os.path.join(MODELS_DIR, "known_features.npz"), X_train=X_train, y_train=y_train)
print(f'Saved to {MODELS_DIR}: face_recognition_model.joblib, known_features.npz')


Saved: face_recognition_model.joblib, known_features.npz


In [None]:
# Load MobileNet model and helpers
mobilenet_model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

def load_and_preprocess_image(img_path):
    try:
        img = Image.open(img_path).convert('RGB').resize((224, 224))
        img_array = np.array(img)
        img_array = preprocess_input(img_array.astype(np.float32))
        img_array = np.expand_dims(img_array, axis=0)
        return img_array
    except Exception as e:
        print(" Error loading image:", e)
        return None


In [None]:
def test_on_unseen_face(img_path, distance_threshold=0.6):
    print(f"\n Testing on image: {os.path.basename(img_path)}")

    img_tensor = load_and_preprocess_image(img_path)
    if img_tensor is None:
        return

    # Load classifier (try alternate filenames) from models directory
    model = None
    for candidate in ['face_recognition_model.joblib', 'face_auth_logreg.joblib', 'face_auth_rf.joblib']:
        try:
            model_path = os.path.join(MODELS_DIR, candidate)
            model = joblib.load(model_path)
            print(f"Loaded model: {model_path}")
            break
        except Exception:
            continue
    if model is None:
        raise FileNotFoundError("No trained model found. Run the training cell first.")

    # Load known features (npz) if available from models directory
    known_features = None
    known_labels = None
    known_features_path = os.path.join(MODELS_DIR, 'known_features.npz')
    if os.path.exists(known_features_path):
        data = np.load(known_features_path)
        known_features = data['X_train']
        known_labels = data['y_train']
    else:
        print('No known features file found; distance check will be skipped')

    # Load label encoder if present from models directory
    le_local = None
    le_path = os.path.join(MODELS_DIR, 'face_auth_label_encoder.joblib')
    if os.path.exists(le_path):
        try:
            le_local = joblib.load(le_path)
        except Exception:
            le_local = None

    # Step 1: Extract features
    feature_vector = mobilenet_model.predict(img_tensor)[0]

    # Step 2: Predict class using classifier
    probs = model.predict_proba(np.expand_dims(feature_vector, axis=0))[0]
    predicted_index = int(np.argmax(probs))
    predicted_class = model.classes_[predicted_index]
    confidence = float(probs[predicted_index])

    print("\n Prediction Probabilities:")
    for cls, prob in zip(model.classes_, probs):
        name = cls
        if le_local is not None and isinstance(cls, (int, np.integer)):
            try:
                name = le_local.inverse_transform([cls])[0]
            except Exception:
                name = cls
        print(f"{name}: {prob:.2f}")

    decoded_pred = predicted_class
    if le_local is not None and isinstance(predicted_class, (int, np.integer)):
        try:
            decoded_pred = le_local.inverse_transform([predicted_class])[0]
        except Exception:
            decoded_pred = predicted_class

    print(f"\n Predicted: {decoded_pred}")
    print(f" Confidence: {confidence:.2f}")

    # Step 3: Distance check to known features (optional)
    if known_features is not None:
        distances = [euclidean(feature_vector, known_vec) for known_vec in known_features]
        min_distance = float(np.min(distances))
        closest_idx = int(np.argmin(distances))
        closest_label = known_labels[closest_idx]
        closest_name = closest_label
        if le_local is not None:
            try:
                closest_name = le_local.inverse_transform([int(closest_label)])[0]
            except Exception:
                closest_name = closest_label
        print(f" Min Distance to known face: {min_distance:.4f}")
        print(f" Closest to: {closest_name}")
        if min_distance < distance_threshold:
            print(f" Access Granted to: {decoded_pred}")
        else:
            print(" Access Denied: Unknown user")
    else:
        print('Distance check skipped (no known features)')



In [None]:
# Quick automatic test: run on member2.jpeg if present
test_img = 'member2.jpeg'
if os.path.exists(test_img):
    print(f'Found test image: {test_img}, running test_on_unseen_face')
    try:
        test_on_unseen_face(test_img)
    except Exception as e:
        print('Error during test_on_unseen_face:', e)
else:
    print("No test image 'member2.jpeg' found. Update the path or add your test image.")

Found test image: member2.jpeg, running test_on_unseen_face

 Testing on image: member2.jpeg
Loaded model: face_recognition_model.joblib
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step

 Prediction Probabilities:
Aubert: 0.26
Jade: 0.14
Liliane: 0.39
Pauline: 0.21

 Predicted: Liliane
 Confidence: 0.39
 Min Distance to known face: 20.1762
 Closest to: Aubert
 Access Denied: Unknown user
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step

 Prediction Probabilities:
Aubert: 0.26
Jade: 0.14
Liliane: 0.39
Pauline: 0.21

 Predicted: Liliane
 Confidence: 0.39
 Min Distance to known face: 20.1762
 Closest to: Aubert
 Access Denied: Unknown user


In [None]:
# Quick automatic test: run on member2.jpeg if present
test_img = 'Jade_neutral.jpeg'
if os.path.exists(test_img):
    print(f'Found test image: {test_img}, running test_on_unseen_face')
    try:
        test_on_unseen_face(test_img)
    except Exception as e:
        print('Error during test_on_unseen_face:', e)
else:
    print("No test image 'member2.jpeg' found. Update the path or add your test image.")

Found test image: Jade_neutral.jpeg, running test_on_unseen_face

 Testing on image: Jade_neutral.jpeg
Loaded model: face_recognition_model.joblib
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step

 Prediction Probabilities:
Aubert: 0.05
Jade: 0.82
Liliane: 0.08
Pauline: 0.04

 Predicted: Jade
 Confidence: 0.82
 Min Distance to known face: 0.0000
 Closest to: Jade
 Access Granted to: Jade

 Prediction Probabilities:
Aubert: 0.05
Jade: 0.82
Liliane: 0.08
Pauline: 0.04

 Predicted: Jade
 Confidence: 0.82
 Min Distance to known face: 0.0000
 Closest to: Jade
 Access Granted to: Jade
