In [54]:
import json
import re

import pandas as pd

import utils.qpl.paths as p

OUTPUTS_DIR = p.TYPES_OUTPUT_DIR

def accuracy(data):
    correct = 0
    for d in data:
        actual_type = set([t.strip().lower() for t in d['actual_type'].split(',')])
        predicted_type = set([t.strip().lower() for t in d['predicted_type'].split(',')])
        if actual_type == predicted_type:
            correct += 1
    return correct / len(data) if data else 1


fields_options = [
    ['model=', 'system=', 'train=', 'test=', 'fewshot=', 'cot', 'seed='],
    ['model=', 'system=', 'train=', 'test=', 'fewshot=', 'seed='],
    ['model=', 'system=', 'dataset=', 'split_train_ratio=', 'fewshot=', 'cot', 'seed='],
    ['model=', 'system=', 'dataset=', 'split_train_ratio=', 'fewshot=', 'seed=']
]

def parse_filename(filename: str) -> dict:
    for fields in sorted(fields_options, key=lambda x: len(x), reverse=True):
        result = {}
        is_valid = True
        for field, nextfield in zip(fields, fields[1:] + [None]):
            try:
                i = filename.index(field)
                j = filename.index('_' + nextfield) if nextfield else filename.index('.json')
            except ValueError as e:
                is_valid = False
                break
            value = filename[i + len(field): j]
            if value == '':
                # boolean field
                value = True
            result[field.replace('=', '')] = value
        if is_valid:
            if 'dataset' in result:
                dataset = result['dataset']
                del result['dataset']
                result['train'] = result['test'] = dataset
            return result
    raise ValueError(f"Failed to parse filename: {filename!r}")
    
results = []
for output_file in sorted(OUTPUTS_DIR.iterdir()):
    if output_file.suffix != '.json':
        continue
    data = json.loads(output_file.read_text())
    acc = accuracy(data)
    fields = parse_filename(output_file.name)
    fields['accuracy'] = acc
    results.append(fields)

df = pd.DataFrame(results)
df


Unnamed: 0,model,system,split_train_ratio,fewshot,cot,seed,train,test,accuracy
0,ollama_chat-gemma3:4b,simple,0.0,0,True,42,concert_singer,concert_singer,0.583333
1,ollama_chat-qwen3:4b,simple,0.0,0,,42,battle_death,battle_death,0.914894
2,ollama_chat-qwen3:4b,simple,0.0,0,True,42,concert_singer,concert_singer,0.863636
3,ollama_chat-qwen3:4b,simple,0.0,0,,42,concert_singer,concert_singer,0.909091
4,openai-gpt-4.1-mini,simple,0.0,0,True,42,battle_death,battle_death,0.893617
5,openai-gpt-4.1-mini,simple,0.15,3,True,42,concert_singer,concert_singer,0.876106
6,openai-gpt-4.1-mini,simple,0.1,3,True,42,concert_singer,concert_singer,0.840336
7,openai-gpt-4.1-mini,simple,0.0,0,True,42,concert_singer,concert_singer,0.909091
8,openai-gpt-4.1-mini,simple,,3,True,42,battle_death,concert_singer,0.840909
9,openai-gpt-4.1-mini,simple,,3,True,42,concert_singer,battle_death,0.87234


In [55]:
IDX = 1
with open(sorted(OUTPUTS_DIR.iterdir())[IDX], 'r') as f:
    data = json.load(f)
print(json.dumps([d for d in data if d['actual_type'] != d['predicted_type']], indent=2))

[
  {
    "db_id": "concert_singer",
    "question": "What are the ids of the singers and number of concerts for each person?",
    "actual_type": "singer, Aggregated[concert]",
    "reasoning": "The question asks for the IDs of singers and the number of concerts for each person. We can achieve this by joining the `singer` and `singer_in_concert` tables, and then joining with the `concert` table. The `singer_in_concert` table links singers to concerts. We can then count the number of concerts for each singer.\nThe query would involve selecting `singer.Singer_ID` and the count of concerts for each singer.",
    "predicted_type": "[\"singer\", \"concert\"]"
  },
  {
    "db_id": "concert_singer",
    "question": "What are the ids of the concerts and the ids of singers who performed in the concerts?",
    "actual_type": "concert, singer",
    "reasoning": "The question asks for the ids of the concerts and the ids of singers who performed in the concerts. To achieve this, we need to join t