In [6]:
import os
import cv2
import numpy as np
import dlib
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, roc_auc_score
from sklearn.preprocessing import StandardScaler
import joblib  # Import joblib to save the model

# Load the Dlib facial landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Paths to datasets
down_syndrome_path = 'downSyndrome'
healthy_path = 'healty'  # Make sure this path is correct

# Constants for LBP
RADIUS = 2
POINTS = 8 * RADIUS
METHOD = 'uniform'

# Size of patches around landmarks
PATCH_SIZE = 16

# List of specific landmarks to extract patches from
landmark_indices = [
    36, 39, 42, 45, 27, 30, 33, 31, 35, 51, 48, 54, 57, 68
]

# Define landmark pairs for geometric features
pairs = [
    (36, 39), (39, 42), (42, 45), (27, 30), (30, 33),
    (33, 31), (33, 35), (30, 31), (30, 35), (33, 51),
    (51, 48), (51, 54), (51, 57), (48, 57), (54, 57),
    (39, 68), (42, 68)
]

# Check if the face is frontal by verifying symmetry
def is_frontal_face(landmarks):
    left_eye = np.mean(np.array(landmarks[36:42]), axis=0)
    right_eye = np.mean(np.array(landmarks[42:48]), axis=0)
    nose_tip = np.array(landmarks[30])
    eye_distance = np.linalg.norm(left_eye - right_eye)
    nose_to_left_eye = np.linalg.norm(nose_tip - left_eye)
    nose_to_right_eye = np.linalg.norm(nose_tip - right_eye)
    symmetry_threshold = 0.15 * eye_distance
    return abs(nose_to_left_eye - nose_to_right_eye) < symmetry_threshold

# Modified function to get landmarks and add the 69th point
def get_landmarks(image_input):
    if isinstance(image_input, str):
        img = cv2.imread(image_input)
    else:
        img = image_input
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    for face in faces:
        landmarks = predictor(gray, face)
        points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
        midpoint_x = (points[21][0] + points[22][0]) // 2
        midpoint_y = (points[21][1] + points[22][1]) // 2
        points.append((midpoint_x, midpoint_y))
        if is_frontal_face(points):
            return points
    return None

# Data augmentation function
def augment_image(image):
    augmented_images = []
    flipped = cv2.flip(image, 1)
    augmented_images.append(flipped)
    rows, cols = image.shape[:2]
    for angle in [-10, 10]:
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
        rotated = cv2.warpAffine(image, M, (cols, rows))
        augmented_images.append(rotated)
    return augmented_images

# Function to extract patches around specific landmarks
def extract_patches(image, landmarks, indices, patch_size=PATCH_SIZE):
    patches = []
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    for idx in indices:
        (x, y) = landmarks[idx]
        x_start = max(x - patch_size // 2, 0)
        y_start = max(y - patch_size // 2, 0)
        x_end = min(x + patch_size // 2, gray.shape[1])
        y_end = min(y + patch_size // 2, gray.shape[0])
        patch = gray[y_start:y_end, x_start:x_end]
        if patch.size > 0:
            patches.append(patch)
    return patches

# Function to extract LBP features from patches
def extract_lbp_from_patches(patches, radius=RADIUS, points=POINTS, method=METHOD):
    lbp_features = []
    for patch in patches:
        lbp = local_binary_pattern(patch, points, radius, method)
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, points + 3), range=(0, points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-6)
        lbp_features.extend(hist)
    return lbp_features

# Function to extract geometric features from specific landmark pairs
def extract_geometric_features(landmarks, pairs):
    geom_features = []
    for i, j in pairs:
        p1 = landmarks[i]
        p2 = landmarks[j]
        distance = np.linalg.norm(np.array(p1) - np.array(p2))
        geom_features.append(distance)
    return geom_features

# Function to extract combined features (LBP + Geometric)
def get_combined_features(image, pairs):
    landmarks = get_landmarks(image)
    if landmarks:
        patches = extract_patches(image, landmarks, landmark_indices)
        lbp_features = extract_lbp_from_patches(patches)
        geom_features = extract_geometric_features(landmarks, pairs)
        combined_features = lbp_features + geom_features
        return combined_features
    return None

# Collect data and labels
X = []
y = []

for path, label in [(down_syndrome_path, 1), (healthy_path, 0)]:
    for img_file in os.listdir(path):
        img_path = os.path.join(path, img_file)
        img = cv2.imread(img_path)
        if img is not None:
            combined_features = get_combined_features(img, pairs)
            if combined_features:
                X.append(combined_features)
                y.append(label)
                augmented_images = augment_image(img)
                for aug_img in augmented_images:
                    aug_features = get_combined_features(aug_img, pairs)
                    if aug_features:
                        X.append(aug_features)
                        y.append(label)

X = np.array(X)
y = np.array(y)

# Handle NaNs and scale features
X = np.nan_to_num(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the SVM classifier with class weights
model = SVC(C=1, kernel='rbf', gamma='scale', probability=True, class_weight='balanced')
model.fit(X_train, y_train)

# Save the trained model and scaler
joblib.dump({'model': model, 'scaler': scaler}, 'trained_model.pkl')
print("Model trained and saved as 'trained_model.pkl'")

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print evaluation results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Model trained and saved as 'trained_model.pkl'
Accuracy: 0.9040639573617588
Precision: 0.8938547486033519
Recall: 0.903954802259887
AUC: 0.9040581073090104

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.90      0.91       793
           1       0.89      0.90      0.90       708

    accuracy                           0.90      1501
   macro avg       0.90      0.90      0.90      1501
weighted avg       0.90      0.90      0.90      1501

