In [1]:
import pandas as pd
import json
import os
from pathlib import Path
from typing import Generator
from copy import deepcopy

In [2]:
RESULTS_DIR = Path(os.path.join('..', 'results'))
METRICS = [
    'accuracy', 
    'f1_score', 
    'recall', 
    'precision', 
    'silhouette', 
    'davies_bouldin',
    'calinski_harabasz', 
    'bcubed_precission', 
    'bcubed_recall', 
    'bcubed_f1'
]

In [3]:
DATA_DICT = {
    'dataset': [],
    'datacleaner': [],
    'vectorizer': [],
    'params_name': [],
}
for metric in METRICS:
    DATA_DICT[metric] = []

In [4]:
def results_iter(root_dir: str | Path) -> Generator[dict[str, dict | str | float], None, None]:
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as file:
                    yield json.load(file)
        for dir_ in dirs:
            results_iter(os.path.join(root, dir_))

def update_data(results, data: dict[str, list[str | float]]):
    datacleaner = results['datacleaner']
    dataset = results['dataset']
    vectorizer = results['vectorizer']
    params_name = results['params_name']
    met: dict[str, float] = results['metrics']

    data['datacleaner'].append(datacleaner)
    data['dataset'].append(dataset)
    data['vectorizer'].append(vectorizer)
    data['params_name'].append(params_name)
    for metric in METRICS:
        data[metric].append(met.get(metric, None))

In [7]:
data = deepcopy(DATA_DICT)
for results in results_iter(RESULTS_DIR):
    update_data(results, data)
results_df = pd.DataFrame.from_dict(data)

In [8]:
results_df.head()

Unnamed: 0,dataset,datacleaner,vectorizer,params_name,accuracy,f1_score,recall,precision,silhouette,davies_bouldin,calinski_harabasz,bcubed_precission,bcubed_recall,bcubed_f1
0,TweeterCyberbullying,DummyDatacleaner,SpacyMorphTagVectorizer,MLP1,0.915423,0.477922,0.5,0.457711,,,,,,
1,TweeterCyberbullying,DummyDatacleaner,SpacyMorphTagVectorizer,MLP2,0.915423,0.477922,0.5,0.457711,,,,,,
2,TweeterCyberbullying,DummyDatacleaner,SpacyMorphTagVectorizer,MLP3,0.915423,0.500348,0.510678,0.708541,,,,,,
3,TweeterCyberbullying,DummyDatacleaner,SpacyMorphTagVectorizer,RandomForest1,0.916418,0.489798,0.505882,0.958167,,,,,,
4,TweeterCyberbullying,DummyDatacleaner,SpacyMorphTagVectorizer,LR1,0.915423,0.477922,0.5,0.457711,,,,,,
