In [43]:
import json
import re

import pandas as pd

import utils.qpl.paths as p

OUTPUTS_DIR = p.TYPES_OUTPUT_DIR

def accuracy(data):
    correct = 0
    for d in data:
        if d['actual_type'].strip().lower() == d['predicted_type'].strip().lower():
            correct += 1
    return correct / len(data) if data else 1


fields_options = [
    ['model=', 'system=', 'train=', 'test=', 'fewshot=', 'cot', 'seed='],
    ['model=', 'system=', 'train=', 'test=', 'fewshot=', 'seed='],
    ['model=', 'system=', 'dataset=', 'split_train_ratio=', 'fewshot=', 'cot', 'seed='],
    ['model=', 'system=', 'dataset=', 'split_train_ratio=', 'fewshot=', 'seed=']
]

def parse_filename(filename: str) -> dict:
    for fields in sorted(fields_options, key=lambda x: len(x), reverse=True):
        result = {}
        is_valid = True
        for field, nextfield in zip(fields, fields[1:] + [None]):
            try:
                i = filename.index(field)
                j = filename.index('_' + nextfield) if nextfield else filename.index('.json')
            except ValueError as e:
                is_valid = False
                break
            value = filename[i + len(field): j]
            if value == '':
                # boolean field
                value = True
            result[field.replace('=', '')] = value
        if is_valid:
            if 'dataset' in result:
                dataset = result['dataset']
                del result['dataset']
                result['train'] = result['test'] = dataset
            return result
    raise ValueError(f"Failed to parse filename: {filename!r}")
    
results = []
for output_file in sorted(OUTPUTS_DIR.iterdir()):
    if output_file.suffix != '.json':
        continue
    data = json.loads(output_file.read_text())
    acc = accuracy(data)
    fields = parse_filename(output_file.name)
    fields['accuracy'] = acc
    results.append(fields)

df = pd.DataFrame(results)
df


Unnamed: 0,model,system,split_train_ratio,fewshot,seed,train,test,accuracy,cot
0,ollama_chat-qwen3:4b,simple,0.0,0,42,concert_singer,concert_singer,0.893939,
1,openai-gpt-4.1-mini,simple,0.15,3,42,concert_singer,concert_singer,0.876106,True
2,openai-gpt-4.1-mini,simple,0.1,3,42,concert_singer,concert_singer,0.840336,True
3,openai-gpt-4.1-mini,simple,0.0,0,42,concert_singer,concert_singer,0.878788,True
4,openai-gpt-4.1-mini,simple,,3,42,battle_death,concert_singer,0.840909,True
5,openai-gpt-4.1-mini,simple,,3,42,concert_singer,battle_death,0.829787,True
6,openai-gpt-4.1-mini,verbose,0.15,3,42,concert_singer,concert_singer,0.902655,True
7,openai-gpt-4.1-mini,verbose,0.1,3,42,concert_singer,concert_singer,0.848739,True
8,openai-gpt-4.1-mini,verbose,0.0,0,42,concert_singer,concert_singer,0.704545,True
9,openai-gpt-4.1-mini,verbose,,3,42,battle_death,concert_singer,0.795455,True
