In [None]:
import json
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import pickle

from sklearn.preprocessing import OneHotEncoder, MaxAbsScaler, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import recall_score, accuracy_score, f1_score, auc, roc_auc_score, precision_score, balanced_accuracy_score, fbeta_score, make_scorer
from sklearn.inspection import permutation_importance

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC



In [None]:
def scoring(clf, X, y):
    scores = {}
    y_pred = clf.predict(X)
    scores['recall'] = recall_score(y, y_pred)
    scores['accuracy'] = accuracy_score(y, y_pred)
    scores['f1'] = f1_score(y, y_pred)
    scores['precision'] = precision_score(y, y_pred)
    scores['balanced_accuracy'] = balanced_accuracy_score(y, y_pred)
    
    return scores

In [None]:
sub_folders = [f for f in os.listdir('.') if os.path.isdir(f) and 'no_' in f]


model_scores = {}
for dataset in ['CATH', 'CENSUS', 'COMA', 'ENSEMBL', 'SCOP', 'TCPH', 'TESMA']:
    dataset_scores = {}
    
    data_file = f'data_{dataset}.json'
    model_folder = f'no_{dataset}'
    
    forest_model = pickle.load(f'./{model_folder}/forest_model.pkl')
    logreg_model = pickle.load(f'./{model_folder}/logreg_model.pkl')
    svc_model = pickle.load(f'./{model_folder}/svc_model.pkl')
    tree_model = pickle.load(f'./{model_folder}/tree_model.pkl')
    
    features = pd.read_csv(f'./features_{dataset}.csv', index_col=0)
    labels = pd.read_csv(f'./labels_{dataset}.csv', index_col=0).to_numpy().ravel()
    
    dataset_scores['DecisionTree'] = scoring(tree_model, features, labels)
    dataset_scores['RandomForest'] = scoring(tree_model, features, labels)
    dataset_scores['SVC'] = scoring(tree_model, features, labels)
    dataset_scores['LogisticRegression'] = scoring(tree_model, features, labels)    
    
    model_scores[dataset] = dataset_scores
    