This notebook calculates the test result $$average\pm stddev$$ for each dataset

In [1]:
# for each dataset dir, open 'test_results.csv', and for each column calculate the mean and std of the values (except the first column)
# then print the results in a table
import os
import csv
import numpy as np

In [2]:
fe_path = '/data/home/gabrielg/Bounded_Future_from_GIT/output/feature_extractor'
fe_model = '2D-EfficientNetV2-m'
eval_scheme = 'LOUO'
results = {}
for dataset in os.listdir(fe_path):
    dataset_path = os.path.join(fe_path, dataset, fe_model, eval_scheme)
    if os.path.isdir(dataset_path):
        results[dataset] = {}
        with open(os.path.join(dataset_path, 'test_results.csv')) as f:
            reader = csv.reader(f)
            header = next(reader)
            rows_data = []
            for i, col in enumerate(header):
                if i == 0:
                    num_of_rows = 0
                    for row in reader:
                        rows_data.append(row)
                        num_of_rows += 1
                    continue
                if col not in results[dataset]:
                    results[dataset][col] = []
                results[dataset][col].append([])
                for row in range(num_of_rows):
                    results[dataset][col][-1].append(rows_data[row][i])

In [3]:
# Print a Markdown table: for each dataset (row), print the mean and std of each metric (column)
print("| Dataset | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |")
print("|---|---|---|---|---|---|---|")
for dataset in results:
    row_to_print = f'| {dataset} |'
    for col in results[dataset]:
        for _, values in enumerate(results[dataset][col]):
            values = np.array(values, dtype=np.float32)
            mean = np.mean(values)
            std = np.std(values)
            row_to_print += f' ${mean:.4f} \pm {std:.4f}$ |'
    print(row_to_print)

| Dataset | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |
|---|---|---|---|---|---|---|
| SAR_RARP50 | $60.4776 \pm 0.7397$ | $46.2786 \pm 2.4125$ | $10.3804 \pm 0.3365$ | $16.1927 \pm 0.5903$ | $12.3187 \pm 0.6615$ | $6.5077 \pm 0.5732$ |
| JIGSAWS | $77.0940 \pm 5.9372$ | $70.1518 \pm 7.4780$ | $16.8955 \pm 4.6744$ | $27.3134 \pm 6.9781$ | $25.7459 \pm 7.1540$ | $20.4230 \pm 6.2379$ |


| Dataset | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |
|---|---|---|---|---|---|---|
| SAR_RARP50 | $60.4776 \pm 0.7397$ | $46.2786 \pm 2.4125$ | $10.3804 \pm 0.3365$ | $16.1927 \pm 0.5903$ | $12.3187 \pm 0.6615$ | $6.5077 \pm 0.5732$ |
| JIGSAWS | $77.0940 \pm 5.9372$ | $70.1518 \pm 7.4780$ | $16.8955 \pm 4.6744$ | $27.3134 \pm 6.9781$ | $25.7459 \pm 7.1540$ | $20.4230 \pm 6.2379$ |