In [None]:
import os
import cv2
import numpy as np
import dlib
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import time

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

down_syndrome_path = 'downSyndrome'
healthy_path = 'healty'

RADIUS = 1
POINTS = 8 * RADIUS
METHOD = 'uniform'
PATCH_SIZE = 32

landmark_indices = [36, 39, 42, 45, 27, 30, 33, 31, 35, 51, 48, 54, 57, 68]

pairs = [
    (36, 39), (39, 42), (42, 45), (27, 30), (30, 33), (33, 31),
    (33, 35), (30, 31), (30, 35), (33, 51), (51, 48), (51, 54),
    (51, 57), (48, 57), (54, 57), (39, 68), (42, 68)
]

In [None]:
def is_frontal_face(landmarks):
    left_eye = np.mean(np.array(landmarks[36:42]), axis=0)
    right_eye = np.mean(np.array(landmarks[42:48]), axis=0)
    nose_tip = np.array(landmarks[30])
    eye_distance = np.linalg.norm(left_eye - right_eye)
    nose_to_left_eye = np.linalg.norm(nose_tip - left_eye)
    nose_to_right_eye = np.linalg.norm(nose_tip - right_eye)
    symmetry_threshold = 0.3 * eye_distance
    return abs(nose_to_left_eye - nose_to_right_eye) < symmetry_threshold

def get_landmarks(image_input):
    gray = cv2.cvtColor(image_input, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    for face in faces:
        landmarks = predictor(gray, face)
        points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
        midpoint_x = (points[21][0] + points[22][0]) // 2
        midpoint_y = (points[21][1] + points[22][1]) // 2
        points.append((midpoint_x, midpoint_y))
        if is_frontal_face(points):
            return points
    return None

def augment_image(image):
    augmented_images = []
    augmented_images.append(cv2.flip(image, 1))  # Flip horizontally
    rows, cols = image.shape[:2]
    for angle in [-15, -10, 0, 10, 15]:  # Rotate
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
        augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))
    noise = np.random.normal(0, 10, image.shape).astype(np.uint8)  # Add Gaussian noise
    augmented_images.append(cv2.add(image, noise))
    return augmented_images

def extract_patches(image, landmarks, indices, patch_size=PATCH_SIZE):
    patches = []
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    for idx in indices:
        (x, y) = landmarks[idx]
        x_start = max(x - patch_size // 2, 0)
        y_start = max(y - patch_size // 2, 0)
        x_end = min(x + patch_size // 2, gray.shape[1])
        y_end = min(y + patch_size // 2, gray.shape[0])
        patch = gray[y_start:y_end, x_start:x_end]
        if patch.size > 0:
            patches.append(patch)
    return patches

def extract_lbp_from_patches(patches, radius=RADIUS, points=POINTS, method=METHOD):
    lbp_features = []
    for patch in patches:
        lbp = local_binary_pattern(patch, points, radius, method)
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, points + 3), range=(0, points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-6)
        lbp_features.extend(hist)
    return lbp_features

def extract_geometric_features(landmarks, pairs):
    geom_features = []
    for i, j in pairs:
        p1 = landmarks[i]
        p2 = landmarks[j]
        distance = np.linalg.norm(np.array(p1) - np.array(p2))
        geom_features.append(distance)
    return geom_features

def align_face(image, landmarks):
    if landmarks is None or len(landmarks) < 68:
        raise ValueError("Landmarks are invalid or incomplete.")
    
    # Convert landmarks to numpy array
    landmarks = np.array(landmarks, dtype=np.float32)

    # Calculate the center of the left and right eyes
    left_eye_center = np.mean(landmarks[36:42], axis=0)
    right_eye_center = np.mean(landmarks[42:48], axis=0)

    # Calculate the angle between the eye centers
    dy = right_eye_center[1] - left_eye_center[1]
    dx = right_eye_center[0] - left_eye_center[0]
    angle = np.degrees(np.arctan2(dy, dx))

    # Calculate the center point between the two eyes
    eye_center = tuple(np.mean([left_eye_center, right_eye_center], axis=0))

    # Get the rotation matrix for the alignment
    rotation_matrix = cv2.getRotationMatrix2D(eye_center, angle, scale=1.0)

    # Perform the alignment
    aligned_image = cv2.warpAffine(
        image, 
        rotation_matrix, 
        (image.shape[1], image.shape[0]),
        flags=cv2.INTER_LINEAR
    )

    # Transform the landmarks to match the aligned image
    ones = np.ones((landmarks.shape[0], 1), dtype=np.float32)  # Homogeneous coordinates
    landmarks_hom = np.hstack([landmarks, ones])
    transformed_landmarks = rotation_matrix @ landmarks_hom.T
    transformed_landmarks = transformed_landmarks[:2].T  # Convert back to (x, y) format

    return aligned_image, transformed_landmarks

def crop_face(image, landmarks, padding_percentage=0.1):
    min_x = min(landmarks, key=lambda x: x[0])[0]
    max_x = max(landmarks, key=lambda x: x[0])[0]
    min_y = min(landmarks, key=lambda x: x[1])[1]
    max_y = max(landmarks, key=lambda x: x[1])[1]

    width = max_x - min_x
    height = max_y - min_y
    max_dimension = max(width, height)

    padding = int(max_dimension * padding_percentage)
    center_x = (min_x + max_x) // 2
    center_y = (min_y + max_y) // 2

    available_left = min_x
    available_right = image.shape[1] - max_x
    available_top = min_y
    available_bottom = image.shape[0] - max_y

    if available_left >= padding and available_right >= padding and available_top >= padding and available_bottom >= padding:
        actual_padding = padding
    else:
        actual_padding = min(available_left, available_right, available_top, available_bottom)

    size = max_dimension + actual_padding * 2

    new_min_x = max(int(center_x - size // 2), 0)
    new_max_x = min(int(center_x + size // 2), image.shape[1])
    new_min_y = max(int(center_y - size // 2), 0)
    new_max_y = min(int(center_y + size // 2), image.shape[0])

    cropped_image = image[new_min_y:new_max_y, new_min_x:new_max_x]
    cropped_image_resized = cv2.resize(cropped_image, (300, 300))

    return cropped_image_resized

def get_combined_features(image, pairs):
    landmarks = get_landmarks(image)
    if landmarks is None or len(landmarks) < 68:
        return None

    try:
        aligned_image, aligned_landmarks = align_face(image, landmarks)
    except ValueError as e:
        print(f"Error in alignment: {e}")
        return None

    cropped_image = crop_face(aligned_image, aligned_landmarks)

    cropped_landmarks = get_landmarks(cropped_image)
    if cropped_landmarks is None or len(cropped_landmarks) < 68:
        return None

    patches = extract_patches(cropped_image, cropped_landmarks, landmark_indices)
    lbp_features = extract_lbp_from_patches(patches)
    geom_features = extract_geometric_features(cropped_landmarks, pairs)

    combined_features = lbp_features + geom_features
    return combined_features


In [None]:
X = []
y = []

for path, label in [(down_syndrome_path, 1), (healthy_path, 0)]:
    for img_file in os.listdir(path):
        img_path = os.path.join(path, img_file)
        img = cv2.imread(img_path)
        if img is not None:
            combined_features = get_combined_features(img, pairs)
            if combined_features:
                X.append(combined_features)
                y.append(label)

                # Apply data augmentation
                augmented_images = augment_image(img)
                for aug_img in augmented_images:
                    aug_features = get_combined_features(aug_img, pairs)
                    if aug_features:
                        X.append(aug_features)
                        y.append(label)

Skipped image: downSyndrome/down_10.jpg (No features detected)
Skipped image: downSyndrome/down_1004.jpg (No features detected)
Skipped image: downSyndrome/down_1005.jpg (No features detected)
Skipped image: downSyndrome/down_101.jpg (No features detected)
Skipped image: downSyndrome/down_1010.jpg (No features detected)
Skipped image: downSyndrome/down_1013.jpg (No features detected)
Skipped image: downSyndrome/down_1014.jpg (No features detected)
Skipped image: downSyndrome/down_1019.jpg (No features detected)
Skipped image: downSyndrome/down_1020.jpg (No features detected)
Skipped image: downSyndrome/down_1030.jpg (No features detected)
Skipped image: downSyndrome/down_1031.jpg (No features detected)
Skipped image: downSyndrome/down_1032.jpg (No features detected)
Skipped image: downSyndrome/down_1035.jpg (No features detected)
Skipped image: downSyndrome/down_1039.jpg (No features detected)
Skipped image: downSyndrome/down_1040.jpg (No features detected)
Skipped image: downSyndrome/

In [33]:
# Convert to arrays
X = np.array(X)
y = np.array(y)

# Handle NaNs and scale features
X = np.nan_to_num(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# # Dimensionality reduction
# pca = PCA(n_components=50)
# X = pca.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [34]:
# Train the model
model = SVC(C=0.8, kernel='rbf', gamma='scale', probability=True, class_weight='balanced')
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")

Training Time: 25.97 seconds


In [35]:
cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print(f"5-Fold Cross-Validation Accuracy: {np.mean(cv_scores):.2f}")

# Evaluate on the test set
y_pred_test = model.predict(X_test)
accuracy_test = accuracy_score(y_test, y_pred_test)
precision_test = precision_score(y_test, y_pred_test)
recall_test = recall_score(y_test, y_pred_test)
auc_test = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

print(f"Test Accuracy: {accuracy_test:.2f}")
print(f"Test Precision: {precision_test:.2f}")
print(f"Test Recall: {recall_test:.2f}")
print(f"Test AUC: {auc_test:.2f}")
print("Test Classification Report:")
print(classification_report(y_test, y_pred_test))

# Evaluate on the training set
y_pred_train = model.predict(X_train)
accuracy_train = accuracy_score(y_train, y_pred_train)
precision_train = precision_score(y_train, y_pred_train)
recall_train = recall_score(y_train, y_pred_train)
auc_train = roc_auc_score(y_train, model.predict_proba(X_train)[:, 1])

print(f"Training Accuracy: {accuracy_train:.2f}")
print(f"Training Precision: {precision_train:.2f}")
print(f"Training Recall: {recall_train:.2f}")
print(f"Training AUC: {auc_train:.2f}")
print("Training Classification Report:")
print(classification_report(y_train, y_pred_train))

5-Fold Cross-Validation Accuracy: 0.93
Test Accuracy: 0.94
Test Precision: 0.93
Test Recall: 0.94
Test AUC: 0.98
Test Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94      1746
           1       0.93      0.94      0.94      1634

    accuracy                           0.94      3380
   macro avg       0.94      0.94      0.94      3380
weighted avg       0.94      0.94      0.94      3380

Training Accuracy: 0.96
Training Precision: 0.95
Training Recall: 0.97
Training AUC: 0.99
Training Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.96      0.96      4149
           1       0.95      0.97      0.96      3736

    accuracy                           0.96      7885
   macro avg       0.96      0.96      0.96      7885
weighted avg       0.96      0.96      0.96      7885



In [None]:
import pickle


model_pipeline = {
    'scaler': scaler,
    'model': model,
    # 'pca' : pca
}

with open('trained_model_v7.pkl', 'wb') as file:
    pickle.dump(model_pipeline, file)

print("Model saved successfully to trained_model.pkl")



Model saved successfully to trained_model.pkl
