In [1]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)


# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = RandomForestClassifier()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Average Metrics:
precision : 0.5066502744768082
recall : 0.5065667665667666
f1-score : 0.45094331023612666
accuracy : 0.5947108066971081


In [2]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = SVC()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


Average Metrics:
precision : 0.5723489278752437
recall : 0.49804741010623366
f1-score : 0.4522196835306714
accuracy : 0.6165525114155251


In [3]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np

import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = GradientBoostingClassifier()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


Average Metrics:
precision : 0.5121065832302776
recall : 0.5124126690303161
f1-score : 0.4770654374880431
accuracy : 0.5753424657534246


In [4]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = KNeighborsClassifier()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


Average Metrics:
precision : 0.46823770261022524
recall : 0.47093809784986257
f1-score : 0.45198999369642945
accuracy : 0.5204337899543379


In [5]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = AdaBoostClassifier()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


Average Metrics:
precision : 0.5173562221217209
recall : 0.5154384341149048
f1-score : 0.4945169101168759
accuracy : 0.5590563165905632


In [6]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)


Average Metrics:
precision : 0.5343729899986915
recall : 0.524218265394736
f1-score : 0.5083623987804247
accuracy : 0.6002663622526636


In [7]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

np.random.seed(3112)

# Read the TSV file
data = pd.read_csv(f"C:/Users/Sina/Desktop/Stage V2/Sripts/emdat and openface clean up/modified_openface.tsv", delimiter='\t')

# Separate features (X) and target variable (y)
X = data.drop(["answer", "Sc_id"], axis=1)
y = data["answer"]

# Replace infinite values with np.nan
X = X.replace([np.inf, -np.inf], np.nan)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the GroupKFold cross-validator
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
metrics = []
accuracies = []
for train_index, test_index in group_kfold.split(X, y, groups=data["Part_id"]):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the classifier
    classifier = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='adam')
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_pred)
    metrics.append(report)
    accuracies.append(accuracy)

# Calculate average metrics across all folds
average_metrics = {}
for metric in metrics[0]['macro avg'].keys():
    if metric == 'support':
        continue
    average_metrics[metric] = np.mean([m['macro avg'][metric] for m in metrics])

average_metrics['accuracy'] = np.mean(accuracies)

# Print average metrics
print("Average Metrics:")
for metric, value in average_metrics.items():
    print(metric, ":", value)




Average Metrics:
precision : 0.5619666251372689
recall : 0.5547458913635384
f1-score : 0.539354825098421
accuracy : 0.6278158295281584


