In [4]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(3112)

from scipy.stats import mode

def perform_decision_fusion(classifiers):
    # Read the TSV file
    data = pd.read_csv("C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_EMDAT30.tsv", delimiter='\t')

    # Separate features (X) and target variable (y)
    X = data.drop(["answer", "Sc_id", "Part_id"], axis=1)
    y = data["answer"]

    # Replace infinite values with np.nan
    X = X.replace([np.inf, -np.inf], np.nan)

    # Handle missing values
    imputer = SimpleImputer(strategy='mean')
    X = imputer.fit_transform(X)

    # Scale the data
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Initialize the GroupKFold cross-validator
    group_kfold = GroupKFold(n_splits=5)

    # Perform decision fusion
    metrics = []
    accuracies = []
    for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Create and train the individual classifiers
        trained_classifiers = []
        for classifier in classifiers:
            clf = classifier()
            clf.fit(X_train, y_train)
            trained_classifiers.append(clf)

        # Make predictions using each classifier
        predictions = []
        for clf in trained_classifiers:
            y_pred = clf.predict(X_test)
            predictions.append(y_pred)

        # Perform majority voting for decision fusion
        y_fused = mode(predictions, axis=0)[0][0]

        # Calculate classification report
        report = classification_report(y_test, y_fused, output_dict=True, zero_division=1)
        accuracy = accuracy_score(y_test, y_fused)
        metrics.append(report)
        accuracies.append(accuracy)

    # Calculate average metrics across all folds
    average_metrics = {}
    for metric in metrics[0]['weighted avg'].keys():
        if metric == 'support':
            continue
        average_metrics[metric] = np.mean([m['weighted avg'][metric] for m in metrics])

    average_metrics['accuracy'] = np.mean(accuracies)

    # Return classification report and accuracy
    return average_metrics

# Define the classifiers to be used for decision fusion
classifiers = [
    RandomForestClassifier,
    SVC,
    GradientBoostingClassifier,
    KNeighborsClassifier,
    AdaBoostClassifier,
    LogisticRegression,
    MLPClassifier,
    GaussianNB,
    DecisionTreeClassifier,
    LinearDiscriminantAnalysis,
    BaggingClassifier,
    ExtraTreesClassifier,
    GaussianProcessClassifier,
    RidgeClassifier,
    LinearSVC,
    CalibratedClassifierCV,
    HistGradientBoostingClassifier,
]

# Perform decision fusion
average_metrics = perform_decision_fusion(classifiers)

# Print the average metrics
print(average_metrics)




{'precision': 0.6495446113898197, 'recall': 0.6085235920852359, 'f1-score': 0.5657875243761509, 'accuracy': 0.6085235920852359}
