In [148]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

df_distances = pd.read_csv(r'.\\FULLDATASET.csv')

X = df_distances.loc[:, 'Distance (1 to 2)':'Distance (67 to 68)']
y = df_distances['Result']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2024)

"""
# Standardize features before PCA
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply PCA
pca = PCA()
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.fit_transform(X_test_scaled)
"""


'\n# Standardize features before PCA\nscaler = StandardScaler()\nX_train_scaled = scaler.fit_transform(X_train)\nX_test_scaled = scaler.transform(X_test)\n\n# Apply PCA\npca = PCA()\nX_train_pca = pca.fit_transform(X_train_scaled)\nX_test_pca = pca.fit_transform(X_test_scaled)\n'

In [149]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

"""
# Train a model with one principal component
X_train_pca_single = X_train_pca[:, :1]  # Only the first component
X_test_pca_single = X_test_pca[:, :1]

# Logistic Regression
log_reg = LogisticRegression(random_state=50, penalty='l2', C=1.)
log_reg.fit(X_train_pca_single, y_train)
log_reg_scores = cross_val_score(log_reg, X_train, y_train, cv=3)
print("Logistic Regression Cross-Validation Accuracy:", log_reg_scores.mean())
print("Logistic Regression Train Accuracy:", log_reg.score(X_train_pca_single, y_train))
print("Logistic Regression Test Accuracy:", log_reg.score(X_test_pca_single, y_test))
"""

# Decision Tree
decision_tree = DecisionTreeClassifier(random_state=50, max_depth=10, min_samples_split=5, min_samples_leaf=2)
decision_tree.fit(X_train, y_train)
decision_tree_scores = cross_val_score(decision_tree, X_train, y_train, cv=3)
print("Decision Tree Cross-Validation Accuracy:", decision_tree_scores.mean())
print("Decision Tree Train Accuracy:", decision_tree.score(X_train, y_train))
print("Decision Tree Test Accuracy:", decision_tree.score(X_test, y_test))

# Random forest
from sklearn.ensemble import RandomForestClassifier
# set up model: use all default values for tuning parameters
rand_for = RandomForestClassifier(random_state=2024, n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=2)
# fit the model
rand_for.fit(X_train, y_train)
rand_for_scores = cross_val_score(rand_for, X_train, y_train, cv=3)
print("Random Forest Cross-Validation Accuracy:", rand_for_scores.mean())
print("Random Forest Train Accuracy:", rand_for.score(X_train, y_train))
print("Random Forest Test Accuracy:", rand_for.score(X_test, y_test))

# Kernel polynomial
from sklearn import svm
kernel = "poly"
degree = 4
gamma = 2
# Train the SVC
poly_ker = svm.SVC(kernel=kernel, degree=degree, gamma=gamma, C=0.1, probability=True).fit(X_train, y_train)
poly_ker_scores = cross_val_score(poly_ker, X_train, y_train, cv=3)
print("Polynomial Kernel Cross-Validation Accuracy:", poly_ker_scores.mean())
print("Polynomial Kernel Train Accuracy:", poly_ker.score(X_train, y_train))
print("Polynomial Kernel Test Accuracy:", poly_ker.score(X_test, y_test))


# Radial basis function kernel
kernel = "rbf"
# Train the SVC
gamma = 2
rbf_ker = svm.SVC(kernel=kernel, gamma=gamma, C=0.1, probability=True).fit(X_train, y_train)
rbf_ker_scores = cross_val_score(rbf_ker, X_train, y_train, cv=3)
print("Radial Basis Function Kernel Cross-Validation Accuracy:", rbf_ker_scores.mean())
print("Radial Basis Function Kernel Train Accuracy:", rbf_ker.score(X_train, y_train))
print("Radial Basis Function Kernel Test Accuracy:", rbf_ker.score(X_test, y_test))


# neural network classification
mlp_net = MLPClassifier(random_state=2024, max_iter=100000, alpha=0.0001, early_stopping=True).fit(X_train, y_train)
mlp_net_scores = cross_val_score(mlp_net, X_train, y_train, cv=3)
print("Radial Basis Function Kernel Cross-Validation Accuracy:", mlp_net_scores.mean())
print("Neural network Train Accuracy:", mlp_net.score(X_train, y_train))
print("Neural network Test Accuracy:", mlp_net.score(X_test, y_test))



Decision Tree Cross-Validation Accuracy: 0.8296703296703297
Decision Tree Train Accuracy: 0.975609756097561
Decision Tree Test Accuracy: 1.0
Random Forest Cross-Validation Accuracy: 0.9029304029304029
Random Forest Train Accuracy: 0.975609756097561
Random Forest Test Accuracy: 1.0
Polynomial Kernel Cross-Validation Accuracy: 0.6849816849816851
Polynomial Kernel Train Accuracy: 1.0
Polynomial Kernel Test Accuracy: 1.0
Radial Basis Function Kernel Cross-Validation Accuracy: 0.7326007326007327
Radial Basis Function Kernel Train Accuracy: 0.7317073170731707
Radial Basis Function Kernel Test Accuracy: 1.0
Radial Basis Function Kernel Cross-Validation Accuracy: 0.7326007326007327
Neural network Train Accuracy: 0.7317073170731707
Neural network Test Accuracy: 1.0


In [None]:
import dlib
import cv2
import numpy as np
from itertools import combinations
import os

def calculate_euclidean_distances(image_path):
    """
    Calculate Euclidean distances between all pairs of facial landmarks from a given image.
    
    Parameters:
        image_path (str): Path to the image file.

    Returns:
        np.ndarray: A 2D array with shape (1, 2278) containing Euclidean distances.
    """
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Image not found or unable to load. Check the image path.")

    # Initialize dlib's face detector and shape predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")  # Make sure to download this file

    # Detect faces in the image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    if len(faces) == 0:
        raise ValueError("No face detected in the image.")

    # Assume only one face and extract the first detected face
    face = faces[0]
    landmarks = predictor(gray, face)

    # Extract (x, y) coordinates of the 68 landmarks
    points = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])

    # Plot the landmarks on the image
    for (x, y) in points:
        cv2.circle(image, (x, y), radius=2, color=(0, 255, 0), thickness=-1)  # Green dots

    # Display the image with landmarks
    cv2.imshow("Image with Landmarks", image)
    cv2.waitKey(0)  # Wait for a key press to close the window
    cv2.destroyAllWindows()

    # Calculate Euclidean distances between all pairs of landmarks
    distances = []
    for (i, j) in combinations(range(68), 2):
        dist = np.linalg.norm(points[i] - points[j])
        distances.append(dist)

    # Convert to 2D array with shape (1, 2278)
    distances_array = np.array(distances).reshape(1, -1)

    print("Prediction and probability for decision tree is: ", decision_tree.predict(distances_array), 
          decision_tree.predict_proba(distances_array))
    print("Prediction and probability for random forest is: ", rand_for.predict(distances_array), 
          rand_for.predict_proba(distances_array))
    print("Prediction and probability for polynomial kernel is: ", poly_ker.predict(distances_array), 
          poly_ker.predict_proba(distances_array))
    print("Prediction and probability for rbf kernel is: ", rbf_ker.predict(distances_array), 
          rbf_ker.predict_proba(distances_array))
    print("Prediction and probability for mlp neural network is: ", mlp_net.predict(distances_array), 
          mlp_net.predict_proba(distances_array))




#image_path = os.path.join("images_trump", "face_width_1.png")
image_path = os.path.join("images_class", "TanBui.jpg")
#image_path = os.path.join("images", "1_3.jpg")
print("This person's outcome in the US Elections is: ")
calculate_euclidean_distances(image_path = image_path)


This person's outcome in the US Elections is: 
[[ 23.62059139]
 [-20.8666603 ]
 [ 93.21178254]
 [ 28.83917346]
 [ 22.26325631]
 [-29.50523866]
 [ 12.14166718]
 [-38.06667963]
 [-37.07500035]
 [ 24.03901743]
 [-38.62876812]
 [ 44.74325575]
 [ -5.13655445]
 [-20.86289174]
 [ 13.95689082]
 [-27.0128381 ]
 [-34.62875911]
 [-23.61794698]
 [-40.43928108]
 [-26.40049245]
 [ 19.01853414]
 [-33.10170949]
 [ 47.4692337 ]
 [ 24.50059916]
 [-27.44211811]
 [-40.05478767]
 [ 75.4469536 ]
 [  6.46925576]
 [-34.1476053 ]
 [-27.11731776]
 [-37.85611722]
 [-37.60025914]
 [-34.48196589]
 [ 16.32983437]
 [195.05295472]
 [-27.34393691]
 [-36.54438342]
 [ 42.82017571]
 [ 32.50041014]
 [-39.70042771]
 [ -4.79184661]]
Prediction and probability for logistic regression is:  ['L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'L' 'L'] [[0.77783077 0.22216923]
 [0.70193992 0.29806008]
 [0.86684415 0.1331558

