In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv("data/feature_extracted_dataset.csv")

In [3]:
# Separate features and target
X = df.drop(columns=['binary_label'])
y = LabelEncoder().fit_transform(df['binary_label'])  # YES -> 1, NO -> 0

In [4]:
# Define models
models = {
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Linear SVM': LinearSVC(max_iter=10000, random_state=42),
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(random_state=42)
}

In [5]:
NUM_FOLDS = 3

In [6]:
# Set up stratified cross-validation
cv = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)

In [7]:
# Evaluate models
def evaluate_model(name, model):
    accuracy = cross_val_score(model, X, y, cv=cv, scoring='accuracy').mean()
    precision = cross_val_score(model, X, y, cv=cv, scoring='precision').mean()
    recall = cross_val_score(model, X, y, cv=cv, scoring='recall').mean()
    f1 = cross_val_score(model, X, y, cv=cv, scoring='f1').mean()
    return {
        'Model': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    }

In [8]:
# Run evaluation
results = []

for name, model in models.items():
    print(f"\n Evaluating model: {name}...")
    
    accuracy = cross_val_score(model, X, y, cv=cv, scoring='accuracy').mean()
    print(f"✔️ Accuracy done for {name}")
    
    precision = cross_val_score(model, X, y, cv=cv, scoring='precision').mean()
    print(f"✔️ Precision done for {name}")
    
    recall = cross_val_score(model, X, y, cv=cv, scoring='recall').mean()
    print(f"✔️ Recall done for {name}")
    
    f1 = cross_val_score(model, X, y, cv=cv, scoring='f1').mean()
    print(f"✔️ F1-score done for {name}")
    
    results.append({
        'Model': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    })

# Convert to DataFrame
results_df = pd.DataFrame(results)
print("\nAll models evaluated.")
print(results_df)



 Evaluating model: Decision Tree...
✔️ Accuracy done for Decision Tree
✔️ Precision done for Decision Tree
✔️ Recall done for Decision Tree
✔️ F1-score done for Decision Tree

 Evaluating model: Linear SVM...
✔️ Accuracy done for Linear SVM
✔️ Precision done for Linear SVM
✔️ Recall done for Linear SVM
✔️ F1-score done for Linear SVM

 Evaluating model: KNN...
✔️ Accuracy done for KNN
✔️ Precision done for KNN
✔️ Recall done for KNN
✔️ F1-score done for KNN

 Evaluating model: Random Forest...
✔️ Accuracy done for Random Forest
✔️ Precision done for Random Forest
✔️ Recall done for Random Forest
✔️ F1-score done for Random Forest

All models evaluated.
           Model  Accuracy  Precision    Recall  F1-Score
0  Decision Tree  0.763731   0.849136  0.831970  0.840452
1     Linear SVM  0.826728   0.867100  0.907442  0.886806
2            KNN  0.783989   0.836829  0.883551  0.859526
3  Random Forest  0.801166   0.845676  0.898115  0.871086


In [9]:
# Show results
print(results_df)

           Model  Accuracy  Precision    Recall  F1-Score
0  Decision Tree  0.763731   0.849136  0.831970  0.840452
1     Linear SVM  0.826728   0.867100  0.907442  0.886806
2            KNN  0.783989   0.836829  0.883551  0.859526
3  Random Forest  0.801166   0.845676  0.898115  0.871086
