In [24]:
import yaml
import numpy as np
import pandas as pd
from tabulate import tabulate

In [17]:
results = \
    pd.concat([
        pd.read_csv("../results/mcmc.csv", header=None),
        pd.read_csv("../results/dev.csv", header=None),
    ])



results.columns = ["timestamp", "config", "precision", "recall", "f1_score"]
# Remove dev result
results = results[results.config.apply(lambda x: False if "dev" in x else True)]
results.head()

Unnamed: 0,timestamp,config,precision,recall,f1_score
0,2023-11-03 13:41:08.457736,config/mcmc/balance_catboost.yml,0.87379,0.888,0.87941
1,2023-11-03 13:41:13.643964,config/mcmc/balance_catboost.yml,0.85089,0.872,0.85908
2,2023-11-03 13:41:18.785991,config/mcmc/balance_catboost.yml,0.85711,0.872,0.8623
3,2023-11-03 13:41:23.932791,config/mcmc/balance_catboost.yml,0.84851,0.864,0.85401
4,2023-11-03 13:41:29.130929,config/mcmc/balance_catboost.yml,0.85347,0.872,0.85894


In [19]:
# results[results.config.apply(lambda x: True if "dev" in x else False)]

In [20]:
def parse_config_clf(config):
    with open(f"../{config}", "r") as f:
        config = yaml.safe_load(f)
    return config["mcmc"]["classifier"]

def parse_config_dataset(config):
    with open(f"../{config}", "r") as f:
        config = yaml.safe_load(f)
    return config["mcmc"]["dataset"]

results["dataset"] = results.config.apply(parse_config_dataset)
results["classifier"] = results.config.apply(parse_config_clf)

In [26]:
rs= pd.pivot_table(
    results,
    index=["dataset"],
    columns = "classifier",
    values="f1_score",
    aggfunc= lambda x: f"{np.mean(x):.5f} ± {np.std(x):.5f}"
)

print(tabulate(rs, headers=rs.columns, tablefmt="github"))

|               | catboost          | decision_tree     | gbc               | knn               | lr                | mlp               | svm               |
|---------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|
| balance       | 0.85534 ± 0.01010 | 0.77878 ± 0.01344 | 0.85522 ± 0.00869 | 0.76407 ± 0.02277 | 0.81874 ± 0.02713 | 0.86270 ± 0.02613 | 0.82459 ± 0.02124 |
| breast-cancer | 0.95912 ± 0.00776 | 0.92483 ± 0.01633 | 0.95609 ± 0.00711 | 0.96472 ± 0.00000 | 0.97981 ± 0.00405 | 0.97278 ± 0.00472 | 0.96897 ± 0.00435 |
| breast-tissue | 0.79040 ± 0.04859 | 0.68911 ± 0.07027 | 0.70142 ± 0.05085 | 0.66342 ± 0.02148 | 0.71234 ± 0.06512 | 0.59860 ± 0.06217 | 0.63469 ± 0.05318 |
| connectionist | 0.78764 ± 0.01644 | 0.72634 ± 0.05323 | 0.79243 ± 0.01853 | 0.81272 ± 0.00909 | 0.66846 ± 0.00843 | 0.83991 ± 0.02029 | 0.78333 ± 0.01880 |
| frogs         | 0.99064 ± 0.00091 | 0.96854 ± 0.00

In [27]:
rs= pd.pivot_table(
    results,
    index=["dataset"],
    columns = "classifier",
    values="precision",
    aggfunc= lambda x: f"{np.mean(x):.5f} ± {np.std(x):.5f}"
)

print(tabulate(rs, headers=rs.columns, tablefmt="github"))

|               | catboost          | decision_tree     | gbc               | knn               | lr                | mlp               | svm               |
|---------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|
| balance       | 0.85048 ± 0.01227 | 0.80887 ± 0.01252 | 0.83717 ± 0.01189 | 0.84035 ± 0.01774 | 0.89089 ± 0.03389 | 0.90199 ± 0.01847 | 0.86760 ± 0.01755 |
| breast-cancer | 0.95966 ± 0.00758 | 0.92566 ± 0.01621 | 0.95713 ± 0.00674 | 0.96518 ± 0.00000 | 0.97983 ± 0.00401 | 0.97285 ± 0.00473 | 0.96902 ± 0.00439 |
| breast-tissue | 0.81126 ± 0.05526 | 0.71117 ± 0.08801 | 0.72665 ± 0.06484 | 0.69018 ± 0.01253 | 0.74354 ± 0.08645 | 0.60262 ± 0.07566 | 0.66122 ± 0.08099 |
| connectionist | 0.78875 ± 0.01734 | 0.74320 ± 0.05750 | 0.79385 ± 0.01846 | 0.81521 ± 0.00827 | 0.67868 ± 0.00700 | 0.84281 ± 0.02219 | 0.78576 ± 0.01775 |
| frogs         | 0.99065 ± 0.00091 | 0.96859 ± 0.00

In [28]:
rs= pd.pivot_table(
    results,
    index=["dataset"],
    columns = "classifier",
    values="recall",
    aggfunc= lambda x: f"{np.mean(x):.5f} ± {np.std(x):.5f}"
)

print(tabulate(rs, headers=rs.columns, tablefmt="github"))

|               | catboost          | decision_tree     | gbc               | knn               | lr                | mlp               | svm               |
|---------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|
| balance       | 0.86427 ± 0.01213 | 0.75520 ± 0.01660 | 0.87787 ± 0.01011 | 0.71040 ± 0.03378 | 0.78213 ± 0.03354 | 0.84000 ± 0.03718 | 0.79493 ± 0.03243 |
| breast-cancer | 0.95936 ± 0.00767 | 0.92515 ± 0.01629 | 0.95643 ± 0.00697 | 0.96491 ± 0.00000 | 0.97983 ± 0.00402 | 0.97280 ± 0.00472 | 0.96900 ± 0.00438 |
| breast-tissue | 0.79546 ± 0.04507 | 0.70151 ± 0.06394 | 0.71515 ± 0.04970 | 0.66364 ± 0.02227 | 0.71667 ± 0.06175 | 0.63333 ± 0.04830 | 0.65455 ± 0.04636 |
| connectionist | 0.78809 ± 0.01667 | 0.72857 ± 0.05309 | 0.79285 ± 0.01860 | 0.81349 ± 0.00887 | 0.66984 ± 0.00809 | 0.84047 ± 0.02053 | 0.78333 ± 0.01880 |
| frogs         | 0.99064 ± 0.00091 | 0.96859 ± 0.00