In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from joblib import dump, load
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve

def which_taxo(file):

    if (file.find("DM2_a.") != -1):
        taxo = pd.read_csv("repro/test/taxoS_test_DM2.csv")
        nom = "DM2 S"
    elif (file.find("DM2_b.") != -1):
        taxo = pd.read_csv("repro/test/taxoS1_test_DM2.csv")
        nom = "DM2 S1"
    elif (file.find("PD_a.") != -1):
        taxo = pd.read_csv("repro/test/taxoS_test_PD.csv")
        nom = "PD S"
    elif (file.find("PD_b.") != -1):
        taxo = pd.read_csv("repro/test/taxoS1_test_PD.csv")
        nom = "PD S1"
    return taxo, nom

def filt_y_rows(taxo,meta):
    condition = []
    for i in range(len(taxo)):
        condition.append(pd.DataFrame(meta["condition"][meta["sample"] == 
                                                        taxo["sample"][i]]).iloc[0,0])
    return condition

def drop_nas(taxo,meta):
    taxo["condition"] = meta
    taxo = taxo.dropna()
    meta = taxo["condition"]
    taxo = taxo.iloc[:,1:-1]
    return taxo, meta

def get_roc_curves(y_test, y_prob, pos_label):
    fper1, tper1, tresholds1 = roc_curve(y_test, y_prob, pos_label=pos_label)
    fig,base = plt.subplots()
    base.set_title("Corba ROC" + " " + nomi)
    base.set_xlabel("Falsos positius")
    base.set_ylabel("Sensibilitat")
    base.plot(fper1, tper1)
    base.plot([0,1],[0,1],'-')
    fig.savefig("repro/metrics/" + file + '_roc_curve.png')
    plt.close()

def get_test_metrics(model, x_test, y_test, labels):
    y_pred = pd.DataFrame(model.predict(x_test))
    y_pred.replace([0,1], labels, inplace=True)
    y_prob = model.predict_proba(x_test)[:,1]
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred, labels = labels).ravel()
    acc = accuracy_score(y_test, y_pred)
    sens = tp / (tp+fn)
    espe = tn / (tn+fp)
    AUC = roc_auc_score(y_test, y_prob)
    get_roc_curves(y_test, y_prob, labels[1])
    return acc, sens, espe, AUC



In [2]:
DirExists = os.path.exists("repro/metrics")
if not DirExists:
    os.makedirs("repro/metrics")
nom = []
nom_model = []
N = []
acc = []
sens = []
espe = []
AUC = []
for file in os.listdir("repro/models/mipredictor/"):
    print(file)
    x_test, nomi = which_taxo(file)
    if (file.find("DM2") != -1):
        y = pd.read_csv("repro/metadades_DM2.csv")
        labels = ["Control", "DM2"]
    elif (file.find("PD") != -1):
        y = pd.read_csv("repro/metadades_PD.csv")
        labels = ["Control", "PD"]
    y_test = filt_y_rows(x_test, y)
    x_test, y_test = drop_nas(x_test, y_test)
    model = load("repro/models/mipredictor/" + file)
    N.append(len(x_test))
    nom.append(nomi)
    nom_model.append(str(model))
    acci, sensi, espei, AUCi =  get_test_metrics(model, x_test, y_test, labels)
    acc.append(acci)
    sens.append(sensi)
    espe.append(espei)
    AUC.append(AUCi)

results = pd.DataFrame()
results["Nom"] = nom
results["Model"] = nom_model
results["n (test)"] = N
results["Exactitud"] = acc
results["Sensibilitat"] = sens
results["Especificitat"] = espe
results["AUC"] = AUC

results.to_csv("repro/metrics/metriques_mipredictor.csv", index=False)

print("metrics done")

DM2_a.joblib


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


DM2_b.joblib


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


PD_a.joblib


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


PD_b.joblib
metrics done


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [3]:
results

Unnamed: 0,Nom,Model,n (test),Exactitud,Sensibilitat,Especificitat,AUC
0,DM2 S,RandomForestClassifier(n_estimators=10),30,0.7,0.666667,0.733333,0.764444
1,DM2 S1,RandomForestClassifier(n_estimators=10),30,0.766667,0.8,0.733333,0.846667
2,PD S,RandomForestClassifier(n_estimators=10),21,0.619048,0.272727,1.0,0.818182
3,PD S1,DecisionTreeClassifier(),21,0.619048,0.454545,0.8,0.627273
