This notebook calculates the test result $$average\pm stddev$$ for each dataset

In [1]:
# for each dataset dir, open 'test_results.csv', and for each column calculate the mean and std of the values (except the first column)
# then print the results in a table
import os
import csv
import numpy as np

In [2]:
fe_path = '/data/home/gabrielg/Bounded_Future_from_GIT/output/feature_extractor'
fe_model = '2D-EfficientNetV2-m'
eval_scheme = 'LOUO'
results = {}
for dataset in os.listdir(fe_path):
    dataset_path = os.path.join(fe_path, dataset, fe_model, eval_scheme)
    for task in os.listdir(dataset_path): 
        if task in ['Suturing', 'gesture', 'steps', 'phases'] and os.path.isdir(os.path.join(dataset_path, task)):
            if 'MultiBypass140'  in dataset:
                dataset = dataset.split('_')[0] + f'_{task[0].upper()}'
            results[dataset] = {}
            results[dataset]['Task'] = task
            with open(os.path.join(dataset_path, task, 'test_results.csv')) as f:
                reader = csv.reader(f)
                header = next(reader)
                rows_data = []
                for i, col in enumerate(header):
                    if i == 0:
                        num_of_rows = 0
                        for row in reader:
                            rows_data.append(row)
                            num_of_rows += 1
                        continue
                    if col not in results[dataset]:
                        results[dataset][col] = []
                    results[dataset][col].append([])
                    for row in range(num_of_rows):
                        results[dataset][col][-1].append(rows_data[row][i])

In [3]:
# Print a Markdown table: for each dataset (row), print the mean and std of each metric (column)
print ("# **Feature Extractor Results**")
print("| Dataset | Task | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |")
print("|---|---|---|---|---|---|---|---|")
print("| VTS | _Gesture_ | $82.66 \pm 6.03$ | $79.46 \pm 8.10$ | - | - | - | - |")
task_str    = 'Task'
task_lst    = ['gesture', 'phases', 'steps']
task_dflt   = 'Gesture'
for dataset in results:
    if 'MultiBypass140'  in dataset:
        dataset_name = dataset.split('_')[0]
    else:
        dataset_name = dataset
    row_to_print = f'| {dataset_name} | _{results[dataset][task_str].capitalize() if results[dataset][task_str] in task_lst else task_dflt}_ |'
    for col in results[dataset]:
        if col == 'Task':
            continue
        for _, values in enumerate(results[dataset][col]):
            values = np.array(values, dtype=np.float32)
            mean = np.mean(values)
            std = np.std(values)
            row_to_print += f' ${mean:.2f} \pm {std:.2f}$ |'
    print(row_to_print)

# **Feature Extractor Results**
| Dataset | Task | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |
|---|---|---|---|---|---|---|---|
| VTS | _Gesture_ | $82.66 \pm 6.03$ | $79.46 \pm 8.10$ | - | - | - | - |
| SAR_RARP50 | _Gesture_ | $60.48 \pm 0.74$ | $46.28 \pm 2.41$ | $10.38 \pm 0.34$ | $16.19 \pm 0.59$ | $12.32 \pm 0.66$ | $6.51 \pm 0.57$ |
| JIGSAWS | _Gesture_ | $77.09 \pm 5.94$ | $70.15 \pm 7.48$ | $16.90 \pm 4.67$ | $27.31 \pm 6.98$ | $25.75 \pm 7.15$ | $20.42 \pm 6.24$ |
| MultiBypass140 | _Steps_ | $68.19 \pm 1.43$ | $54.60 \pm 0.67$ | $3.53 \pm 0.07$ | $5.30 \pm 0.15$ | $3.63 \pm 0.17$ | $1.66 \pm 0.09$ |
| MultiBypass140 | _Phases_ | $79.19 \pm 0.41$ | $66.79 \pm 0.78$ | $1.56 \pm 0.08$ | $2.47 \pm 0.11$ | $1.48 \pm 0.13$ | $0.49 \pm 0.06$ |


# **Feature Extractor Results**
| Dataset | Task | Accuracy | F1-Macro | Edit |  F1@10 | F1@25 | F1@50 |
|---|---|---|---|---|---|---|---|
| VTS | _Gesture_ | $82.66 \pm 6.03$ | $79.46 \pm 8.10$ | - | - | - | - |
| SAR_RARP50 | _Gesture_ | $60.48 \pm 0.74$ | $46.28 \pm 2.41$ | $10.38 \pm 0.34$ | $16.19 \pm 0.59$ | $12.32 \pm 0.66$ | $6.51 \pm 0.57$ |
| JIGSAWS | _Gesture_ | $77.09 \pm 5.94$ | $70.15 \pm 7.48$ | $16.90 \pm 4.67$ | $27.31 \pm 6.98$ | $25.75 \pm 7.15$ | $20.42 \pm 6.24$ |
| MultiBypass140 | _Steps_ | $68.19 \pm 1.43$ | $54.60 \pm 0.67$ | $3.53 \pm 0.07$ | $5.30 \pm 0.15$ | $3.63 \pm 0.17$ | $1.66 \pm 0.09$ |
| MultiBypass140 | _Phases_ | $79.19 \pm 0.41$ | $66.79 \pm 0.78$ | $1.56 \pm 0.08$ | $2.47 \pm 0.11$ | $1.48 \pm 0.13$ | $0.49 \pm 0.06$ |