In [5]:
folder_path = '../results/phet/expanded'

In [6]:
import os
import json
import math

def read_json_files(folder_path):
    results = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            disease_name = filename.split('_')[0].replace('-', ' ').title()
            with open(os.path.join(folder_path, filename), 'r') as f:
                data = json.load(f)
                results[disease_name] = data
    return results

def format_metric(mean, lower, upper):
    return f"{mean:.2f} ({lower:.2f}-{upper:.2f})"

def create_latex_rows(results, metric_prefix):
    sorted_diseases = sorted(results.keys())
    mid_point = math.ceil(len(sorted_diseases) / 2)
    
    first_half = sorted_diseases[:mid_point]
    second_half = sorted_diseases[mid_point:]
    
    rows = []
    for half in [first_half, second_half]:
        cells = []
        for disease in half:
            metrics = results[disease]
            mean = metrics[f'mean_{metric_prefix}']
            lower = metrics[f'lower_{metric_prefix}']
            upper = metrics[f'upper_{metric_prefix}']
            cells.append(format_metric(mean, lower, upper))
        rows.append(' & '.join(cells))
    
    return rows

# Read JSON files
results = read_json_files(folder_path)

# Create and print column names (diseases)
sorted_diseases = sorted(results.keys())
mid_point = math.ceil(len(sorted_diseases) / 2)
first_half_diseases = sorted_diseases[:mid_point]
second_half_diseases = sorted_diseases[mid_point:]

print("Column names for LaTeX table (first half):")
print(' & '.join(first_half_diseases))
print("\nColumn names for LaTeX table (second half):")
print(' & '.join(second_half_diseases))

# Create and print AUROC rows
auroc_rows = create_latex_rows(results, 'auroc')
print("\nAUROC rows for LaTeX table:")
print("First half:", auroc_rows[0])
print("Second half:", auroc_rows[1])

# Create and print AUPRC rows
auprc_rows = create_latex_rows(results, 'auprc')
print("\nAUPRC rows for LaTeX table:")
print("First half:", auprc_rows[0])
print("Second half:", auprc_rows[1])

Column names for LaTeX table (first half):
Asthma.Json & Cataract.Json & Diabetes.Json & Gerd.Json & Hay Fever & Eczema.Json

Column names for LaTeX table (second half):
Major Depression.Json & Myocardial Infarction.Json & Osteoarthritis.Json & Pneumonia.Json & Stroke.Json

AUROC rows for LaTeX table:
First half: 0.56 (0.55-0.58) & 0.75 (0.74-0.76) & 0.82 (0.80-0.83) & 0.62 (0.60-0.63) & 0.57 (0.56-0.59)
Second half: 0.63 (0.62-0.65) & 0.75 (0.73-0.77) & 0.69 (0.68-0.71) & 0.66 (0.64-0.68) & 0.75 (0.72-0.77)

AUPRC rows for LaTeX table:
First half: 0.19 (0.17-0.20) & 0.34 (0.31-0.36) & 0.39 (0.36-0.42) & 0.27 (0.25-0.28) & 0.26 (0.25-0.28)
Second half: 0.21 (0.19-0.23) & 0.20 (0.18-0.23) & 0.45 (0.43-0.47) & 0.11 (0.09-0.13) & 0.09 (0.07-0.11)
