In [1]:
import re
import csv
import glob

# List of log files to process
log_files = [f"logging/logs/run_{i}.log" for i in range(1, 11)]

# Regular expression to match table rows
row_pattern = re.compile(
    r"\|\s*(\w+)\s*\|\s*(\w+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*\|"
)

# Output CSV file
output_csv = "combined_log_table.csv"

# Collect all rows
all_rows = []

for log_file in log_files:
    with open(log_file, "r") as f:
        for line in f:
            match = row_pattern.match(line)
            if match:
                all_rows.append(match.groups())

In [2]:
# Now calculate the p-value for the hypothesis that tta_val_acc is > 0.9400
from scipy import stats

# Extract tta_val_acc values
tta_val_acc_values = [float(row[4]) for row in all_rows if row[0] != "warmup"]

# Perform one-sided t-test
t_stat, p_value = stats.ttest_1samp(tta_val_acc_values, 0.9400)
p_value = p_value / 2  # One-sided test

print(f"P-value for the hypothesis that tta_val_acc > 0.9400: {p_value}")

P-value for the hypothesis that tta_val_acc > 0.9400: 1.0778607131360659e-06


In [3]:
mean_acc = sum(tta_val_acc_values) / len(tta_val_acc_values)
std_acc = (sum((x - mean_acc) ** 2 for x in tta_val_acc_values) / (len(tta_val_acc_values) - 1)) ** 0.5
mean_time = sum(float(row[5]) for row in all_rows if row[0] != "warmup") / len(tta_val_acc_values)
std_time = (sum((float(row[5]) - mean_time) ** 2 for row in all_rows if row[0] != "warmup") / (len(tta_val_acc_values) - 1)) ** 0.5

print("Accuracies: Mean: %.6f    Std: %.6f" % (mean_acc, std_acc))
print("Times (s):  Mean: %.6f    Std: %.6f" % (mean_time, std_time))

Accuracies: Mean: 0.940139    Std: 0.001310
Times (s):  Mean: 2.030108    Std: 0.001226
