In [2]:
import pandas as pd
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [4]:
classifiers = ["Random Forest", "Logistic Regression", "K-Nearest Neighbors", "Decision Tree", "Naive Bayes"]

# All data

In [10]:
y_test = pd.read_csv('../Machine_Learning/data/y_test_shuffled_all.csv')

In [11]:
test_results = {}

# Loop through the classifiers
for name in classifiers:
    # Load model
    with open(f'../Machine_Learning/Models/{name}_model_all.pkl', 'rb') as f:
        clf = pickle.load(f)
    
    # Load predictions
    with open(f'../Machine_Learning/Predictions/{name}_predictions_all.pkl', 'rb') as f:
        y_pred = pickle.load(f)
    
    # Evaluate on test set
    test_scores = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_pred)
    }
    
        # Store test results
    test_results[name] = {
        "Test Scores": test_scores,
    }

with open('../Machine_Learning/Results/test_results_all.pkl', 'wb') as f:
    pickle.dump(test_results, f)


# With unique symptoms in train und test

# 5% True labels

In [5]:
y_test5 = pd.read_csv('../Machine_Learning/data/y_test_unique_5_shuffled.csv')

In [6]:
test_results = {}

# Loop through the classifiers
for name in classifiers:
    # Load model
    with open(f'../Machine_Learning/Models/{name}_model_5.pkl', 'rb') as f:
        clf = pickle.load(f)
    
    # Load predictions
    with open(f'../Machine_Learning/Predictions/{name}_predictions_5.pkl', 'rb') as f:
        y_pred5 = pickle.load(f)
    
    # Evaluate on test set
    test_scores = {
        'accuracy': accuracy_score(y_test5, y_pred5),
        'precision': precision_score(y_test5, y_pred5),
        'recall': recall_score(y_test5, y_pred5),
        'f1': f1_score(y_test5, y_pred5),
        'roc_auc': roc_auc_score(y_test5, y_pred5)
    }
    
        # Store test results
    test_results[name] = {
        "Test Scores": test_scores,
    }

with open('../Machine_Learning/Results/test_results_5.pkl', 'wb') as f:
    pickle.dump(test_results, f)


# 10% True labels

In [10]:
y_test10 = pd.read_csv('../Machine_Learning/data/y_test_unique_10_shuffled.csv')

In [13]:
test_results = {}

# Loop through the classifiers
for name in classifiers:
    # Load model
    with open(f'../Machine_Learning/Models/{name}_model_10.pkl', 'rb') as f:
        clf = pickle.load(f)
    
    # Load predictions
    with open(f'../Machine_Learning/Predictions/{name}_predictions_10.pkl', 'rb') as f:
        y_pred10 = pickle.load(f)
    
    # Evaluate on test set
    test_scores = {
        'accuracy': accuracy_score(y_test10, y_pred10),
        'precision': precision_score(y_test10, y_pred10),
        'recall': recall_score(y_test10, y_pred10),
        'f1': f1_score(y_test10, y_pred10),
        'roc_auc': roc_auc_score(y_test10, y_pred10)
    }
    
        # Store test results
    test_results[name] = {
        "Test Scores": test_scores,
    }

with open('../Machine_Learning/Results/test_results_10.pkl', 'wb') as f:
    pickle.dump(test_results, f)
