In [1]:
import json
import numpy as np
import pandas as pd
import os
import re

root_path = '/home/v-yuanteli/aide_gpt_4o_our_results/2025-03-04T01-11-04-GMT_run-group_aide'
all_dfs = []

for competition in os.listdir(root_path):
    competition_path = os.path.join(root_path, competition)
    if not os.path.isdir(competition_path):
        continue

    file_path = os.path.join(competition_path, 'logs/journal.json')
    log_path = os.path.join(competition_path, 'logs/aide.log')
    base_path = competition_path
    
    if not os.path.exists(file_path) or not os.path.exists(log_path):
        continue
    
    # extract JSON for each loop's info
    with open(file_path, 'r') as file:
        content = file.read().strip()
        if not content:
            print(f"Error: {file_path} is empty, skipping this competition.")
            continue

        try:
            data = json.loads(content)
        except json.JSONDecodeError as e:
            print(f"Error: read {file_path} failed, error info is : {e}")
            continue

    
    nodes = data.get('nodes', [])
    records = []
    for node in nodes:
        step = node.get('step')
        node_id = node.get('id')
        metric = node.get('metric', {})
        is_buggy = node.get('is_buggy')
        records.append({'step': step, 'id': node_id, 'metric': metric, 'is_buggy': is_buggy})
    
    df = pd.DataFrame(records)
    
    # extract time
    with open(log_path, 'r') as file:
        log = file.read()
    
    pattern = r'\[(.*?)\] INFO: Agent is generating code, parent node type'
    times = re.findall(pattern, log)
    
    if len(times) > len(df):
        new_row = pd.DataFrame([{'step': df['step'].iloc[-1] + 1, 'id': '', 'metric': {}, 'is_buggy': None}])
        df = pd.concat([df, new_row], ignore_index=True)
    
    df['times'] = times[:len(df)]
    
    maximize = None
    
    for index, row in df.iterrows():
        if row['metric'].get('maximize') is not None:
            maximize = row['metric']['maximize']
            break
    
    if maximize is None:
        df['sota'] = None
    else:
        if maximize:
            max_value = -np.inf
            def calculate_sota(row, max_value=[-np.inf]):
                metric = row['metric']
                if metric.get('value') is None:
                    return None
                if metric['value'] > max_value[0]:
                    max_value[0] = metric['value']
                    return True
                return False
        else:
            min_value = np.inf
            def calculate_sota(row, min_value=[np.inf]):
                metric = row['metric']
                if metric.get('value') is None:
                    return None
                if metric['value'] < min_value[0]:
                    min_value[0] = metric['value']
                    return True
                return False
    
        df['sota'] = df.apply(calculate_sota, axis=1)
    
    # extract grading data
    grading_folders = [f for f in os.listdir(base_path) if f.startswith('grading_output_') and os.path.isdir(os.path.join(base_path, f))]
    scores = []
    
    for folder in grading_folders:
        folder_path = os.path.join(base_path, folder)
        folder_number = int(folder.split('_')[-1])
        json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
        assert len(json_files) == 1, f"Expected exactly one JSON file in {folder_path}, but found {len(json_files)}"
        
        json_path = os.path.join(folder_path, json_files[0])
        with open(json_path, 'r') as file:
            data = json.load(file)
            competition_reports = data.get('competition_reports', [])
            score = competition_reports[0].get('score') if competition_reports else None
            scores.append({'folder_number': folder_number, 'score_loop': score})
    
    scores_df = pd.DataFrame(scores)
    if not scores_df.empty:
        scores_df = scores_df.sort_values(by='folder_number').reset_index(drop=True)
        score_dict = scores_df.set_index('folder_number')['score_loop'].to_dict()
        df['score_loop'] = df['step'].map(score_dict)
    else:
        df['score_loop'] = None
    
    df['score'] = df['score_loop'].copy()

    for i in range(1, len(df)):
        if df.at[i, 'sota'] != True:
            df.at[i, 'score'] = df.at[i-1, 'score']
    
    df['competition'] = competition
    all_dfs.append(df)

final_df = pd.concat(all_dfs, ignore_index=True)

Error: /home/v-yuanteli/aide_gpt_4o_our_results/2025-03-04T01-11-04-GMT_run-group_aide/mlsp-2013-birds_16fd152f-acde-4a45-b7df-7f99a20e6564/logs/journal.json is empty, skipping this competition.


  final_df = pd.concat(all_dfs, ignore_index=True)


In [2]:
final_df

Unnamed: 0,step,id,metric,is_buggy,times,sota,score_loop,score,competition
0,0,ed089e98eed84be9aa1e70f64927cead,"{'value': None, 'maximize': None}",True,"2025-03-04 01:11:21,664",,,,ranzcr-clip-catheter-line-classification_7a58e...
1,1,429ca97d583843afb4a659fa588520ad,"{'value': None, 'maximize': None}",True,"2025-03-04 01:12:20,154",,,,ranzcr-clip-catheter-line-classification_7a58e...
2,2,8e653d6943aa4a78b5a6e76350b0bf6f,"{'value': None, 'maximize': None}",True,"2025-03-04 01:12:46,384",,,,ranzcr-clip-catheter-line-classification_7a58e...
3,3,d202234020434cc49f26933b0668f177,"{'value': None, 'maximize': None}",True,"2025-03-04 01:13:07,254",,,,ranzcr-clip-catheter-line-classification_7a58e...
4,4,6471e75cb59142d7b0ae4b40d89180c5,"{'value': None, 'maximize': None}",True,"2025-03-04 01:13:49,200",,,,ranzcr-clip-catheter-line-classification_7a58e...
...,...,...,...,...,...,...,...,...,...
1042,14,df59c60d8e2d4026ac38878d260f296f,"{'value': None, 'maximize': None}",True,"2025-03-04 03:30:51,819",,,0.99004,text-normalization-challenge-english-language_...
1043,15,c8b69be615b946948f1359264d435295,"{'value': None, 'maximize': None}",True,"2025-03-04 04:11:16,654",,,0.99004,text-normalization-challenge-english-language_...
1044,16,cb086d4062ec4f49b4b6a7bfeb3b7426,"{'value': None, 'maximize': None}",True,"2025-03-04 13:11:39,469",,,0.99004,text-normalization-challenge-english-language_...
1045,17,b07b9f5930f34310aac6d8f38901f193,"{'value': None, 'maximize': None}",True,"2025-03-04 22:12:03,529",,,0.99004,text-normalization-challenge-english-language_...


In [None]:
final_df.to_csv('aide_gpt_4o_our_results.csv', index=False)