In [1]:
import os
import json


In [2]:
# Define the path to the main folder and summary file
base_folder = 'models'  # Adjust the path if necessary
summary_path = os.path.join(base_folder, 'summary.json')


In [3]:
base_folder

'models'

In [4]:
summary_path

'models/summary.json'

In [24]:
with open(summary_path, 'r') as f:
    summary_data = json.load(f)
for id in summary_data["models"]:
    print(id["input_id"])

f449b7cf681b2e22
45eb22c628894821
04cc4bd0161c9867
367a6803fa831e29
5deb64590c6ca9ba
bfe2b0559a199d63
3e5c102bdabf616f
c118d8f7c7e90b73
90dd9b3228c9286c
39c3e308b3eda53c
c290558657e056ca
4c643cf359c3e311
62117ec5e76d027e
ac86ef191eb2280c
934b9d626fea0f44
8cb0235feafe3b8f
a73897880fb27823
707bd5849a85b9aa
9a00d9457d314c01
cf4d797e030fa96f
384fd49572c91514
1eb1fc2e2684d6fd
f7dc685776059b00
fb0b79d598cd52c6
9d84cedd21a440f2
17b14b7200069d2a
6461585725747027
729b398b6557c3ef
bb3f86763870846e
1291497db699c2c7
760a372701a87083
b89e3b79ff8389c9
1b530698396bcd64
59ad2abd01c4e2fe
89f8eb7d670503d9
5a8d9f01914a67c9
c58047a20be8f56c
9651de7942d64c3f
3f81254d74aceb2a
26500b23fff148d6
a0a97e46e71f898c
46a6845581ae64d3
9e17eb50e893a147
e28c03a79010639a
f3d506dad340fa6b
b3375e931dca7220
05b557b8142a6322
642839130ec6c668


In [101]:
# Initialize a counter for cases where "sparse" is better than "dense"

def retrieve_table(model_name, callback_type, metric):
    sparse_better_count = 0
    total_exp = 0
    
    # Loop through each input_id stored in the summary file
    for input_id in summary_data["models"]:
        input_id_path = input_id["input_id"]
    
        # Initialize variables to store the performance data for the current input_id
        sparse_data = None
        dense_data = None
        
        # Check for "sparse" and "dense" model output.json files within the current input_id folder
        sparse_path = os.path.join(base_folder, input_id_path, model_name, 'output.json')
        dense_path = os.path.join(base_folder, input_id_path, 'dense', 'output.json')
        
        # Read the "sparse" model data if it exists
        if os.path.exists(sparse_path):
            with open(sparse_path, 'r') as f:
                sparse_data = json.load(f)
    
        # Read the "dense" model data if it exists
        if os.path.exists(dense_path):
            with open(dense_path, 'r') as f:
                dense_data = json.load(f)
    
        sp = sparse_data[callback_type][metric]
        de = dense_data["NonCallBack"][metric]
        
        if de != None and sp != None:
            if sp < de:
                sparse_better_count += 1
        else:
            if de == None and sp != None:
                sparse_better_count += 1
            if de == None and sp == None:
                total_exp -= 1
            
        total_exp += 1

    return sparse_better_count, total_exp, f"{(sparse_better_count / total_exp):.4f}"

In [87]:
retrieve_table("sparse_0.5", "NonCallBack", "MostNegativeValue")

(22, 48, 0.4583333333333333)

In [102]:
# Create an empty DataFrame to store the results
import pandas as pd
model_names = ["sparse_0.5", "sparse_0.8", "sparse_0.9", "structure_0.2", "structure_0.3"]
callback_types = ["NonCallBack", "RemoveRegion"]
metrics = ["MostNegativeValue", "TimeFirstNegative"]
# Create separate DataFrames for each callback type

non_callback_data = {}
callback_data = {}

# Loop through each model_name, callback_type, and metric to populate data
for model_name in model_names:
    non_callback_data[model_name] = {}
    callback_data[model_name] = {}
    
    for metric in metrics:
        non_callback_data[model_name][metric] = retrieve_table(model_name, 'NonCallBack', metric)
        callback_data[model_name][metric] = retrieve_table(model_name, 'RemoveRegion', metric)

# Convert the data dictionaries to DataFrames
non_callback_df = pd.DataFrame.from_dict(non_callback_data, orient='index')
callback_df = pd.DataFrame.from_dict(callback_data, orient='index')

# Combine both tables into one DataFrame with an identifier for callback type
combined_df = pd.concat([non_callback_df, callback_df], keys=['NonCallBack', 'RemoveRegion'], names=['Callback Type', 'Model Name'])

# Export the combined DataFrame to a single CSV file
combined_df.to_csv('model_comparison_tables.csv')