In [1]:
import os
import cv2
from sklearn.preprocessing import LabelEncoder
from skimage.feature import hog
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
# Evaluate the performance of the model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
# Set up the paths to the input directories
train_dir = "C:/Users/Dell/University/Machine learning/Face_splittedData/train"
test_dir =  "C:/Users/Dell/University/Machine learning/Face_splittedData/test"

# Set up the image dimensions
img_height = 224
img_width = 224


# Define a function to preprocess the images
def preprocess_images(data_dir):
    X = []
    y = []
    
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if os.path.isdir(class_dir):
            for file_name in os.listdir(class_dir):
                if file_name.endswith('.jpg'):
                    img_path = os.path.join(class_dir, file_name)
                    img = cv2.imread(img_path)
                    img = cv2.resize(img, (img_height, img_width))
                    X.append(img)
                    y.append(class_name)
                    
    X = np.array(X)
    y = np.array(y)
    
    # Normalize the data using mean and standard deviation and we applied the normalization before the oversamling to prevent data leakage
    scaler = StandardScaler()
    X = scaler.fit_transform(X.reshape(X.shape[0], -1))
    X = X.reshape(-1, img_height, img_width, 3)
    
    # Apply SMOTE to balance the classes
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X.reshape(X.shape[0], -1), y)
    X_resampled = X_resampled.reshape(-1, img_height, img_width, 3)
    
   
    
    
    # Perform label encoding on the target variable (labels)
    label_encoder = LabelEncoder()
    y_resampled = label_encoder.fit_transform(y_resampled)
    
    return X_resampled, y_resampled
    




In [3]:
# Preprocess the training data
train_X, train_y = preprocess_images(train_dir)

# Preprocess the testing data
test_X, test_y = preprocess_images(test_dir)

In [4]:
#print(label_encoder.classes_)
#train_X.reshape(1346, -1)
#test_X.reshape(506, -1)

In [5]:
# Flatten each image into a 1-dimensional array of features
train_X_flat = np.array([img.flatten() for img in train_X])
test_X_flat = np.array([img.flatten() for img in test_X])




In [6]:
test_X_flat.shape

(506, 150528)

In [10]:
 #SVM with a non-linear kernel
#svm = SVC(C=1, kernel='poly', degree=3, gamma='scale', coef0=0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)
#svm with linear kernel 
svm=SVC(kernel='linear', C=1, random_state=42)
# Train SVM on the preprocessed training data
svm.fit(train_X_flat, train_y)

# Predict labels for the preprocessed testing data
pred_y = svm.predict(test_X_flat)

# Evaluate the performance of the classifier
acc = accuracy_score(test_y, pred_y)
rec = recall_score(test_y, pred_y)
prec = precision_score(test_y, pred_y)
f1 = f1_score(test_y, pred_y)

print("Accuracy: {:.3f}".format(acc))
print("Recall: {:.3f}".format(rec))
print("Precision: {:.3f}".format(prec))
print("F1_score: {:.3f}".format(f1))

Accuracy: 1.000
Recall: 1.000
Precision: 1.000
F1_score: 1.000


In [None]:
print(len(test_y)) 
print(len(pred_y))