In [2]:
import numpy as np
from sklearn.model_selection import KFold, cross_validate
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [15]:
def train_and_evaluate(dataframe, k=5, max_iter=10000):
    # --- Data Preparation ---
    df = dataframe.iloc[1:].copy()  # Skip the first row
    X = df.iloc[:, 2:]
    y = df.iloc[:, 1]

    # --- Pipeline: StandardScaler + SVC ---
    # SVM is sensitive to scale, so standardizing often helps performance and convergence
    svm_pipeline = make_pipeline(
        StandardScaler(),
        SVC(kernel='rbf', max_iter=max_iter, random_state=42)
    )

    # --- Cross Validation Setup ---
    cv = KFold(n_splits=k, shuffle=True, random_state=42)
    scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    
    # --- Perform Cross Validation ---
    cv_results = cross_validate(svm_pipeline, X, y, cv=cv, scoring=scoring)

    # --- Calculate Average Metrics ---
    avg_accuracy  = np.mean(cv_results['test_accuracy'])
    avg_precision = np.mean(cv_results['test_precision_macro'])
    avg_recall    = np.mean(cv_results['test_recall_macro'])
    avg_f1        = np.mean(cv_results['test_f1_macro'])

    # --- Print or Return Results ---
    metrics = {
        'accuracy':  avg_accuracy,
        'precision': avg_precision,
        'recall':    avg_recall,
        'f1_score':  avg_f1
    }

    print(f"SVM CV Metrics (averaged over {k} folds):")
    for metric_name, value in metrics.items():
        print(f"{metric_name.capitalize()}: {value*100:.2f}")

    return metrics

In [16]:
# Combined dataset
df = pd.read_csv('Datasets/combined_dataset.csv')
train_and_evaluate(df)

SVM CV Metrics (averaged over 5 folds):
Accuracy: 93.03
Precision: 90.86
Recall: 83.09
F1_score: 86.14


{'accuracy': np.float64(0.9302905282435987),
 'precision': np.float64(0.9085915798625376),
 'recall': np.float64(0.8309122093659251),
 'f1_score': np.float64(0.8614017103453468)}

In [17]:
# Motor only dataset
df = pd.read_csv('Datasets/motor_only.csv')
train_and_evaluate(df)

SVM CV Metrics (averaged over 5 folds):
Accuracy: 92.94
Precision: 90.37
Recall: 85.27
F1_score: 87.46


{'accuracy': np.float64(0.9293923250987655),
 'precision': np.float64(0.9036750538738378),
 'recall': np.float64(0.8527257191663727),
 'f1_score': np.float64(0.8745503363468794)}

In [18]:
# Non-motor only dataset
df = pd.read_csv('Datasets/non_motor_only.csv')
train_and_evaluate(df)

SVM CV Metrics (averaged over 5 folds):
Accuracy: 83.22
Precision: 80.81
Recall: 65.05
F1_score: 68.73


{'accuracy': np.float64(0.832219004161091),
 'precision': np.float64(0.80812093192804),
 'recall': np.float64(0.6504552696369107),
 'f1_score': np.float64(0.6873406321442996)}

In [19]:
# Objective only dataset
df = pd.read_csv('Datasets/objective_only.csv')
train_and_evaluate(df)

SVM CV Metrics (averaged over 5 folds):
Accuracy: 91.73
Precision: 89.82
Recall: 82.72
F1_score: 85.60


{'accuracy': np.float64(0.9173074055250791),
 'precision': np.float64(0.8982259314000232),
 'recall': np.float64(0.8271982266881339),
 'f1_score': np.float64(0.8559629283060692)}

In [20]:
# Self Report only dataset
df = pd.read_csv('Datasets/self_report_only.csv')
train_and_evaluate(df)

SVM CV Metrics (averaged over 5 folds):
Accuracy: 87.68
Precision: 83.99
Recall: 70.91
F1_score: 74.64


{'accuracy': np.float64(0.8767594861454073),
 'precision': np.float64(0.8399422103122957),
 'recall': np.float64(0.709055808338783),
 'f1_score': np.float64(0.7464348151164265)}