In [None]:
import json
from pathlib import Path

import evaluate
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [None]:
tqdm.pandas()

sacrebleu = evaluate.load('sacrebleu')
meteor = evaluate.load('meteor')
bertscore = evaluate.load('bert_score')

def compute_metrics(references, predictions):
    sacrebleu_score = sacrebleu.compute(references=references, predictions=predictions)
    meteor_score = meteor.compute(references=references, predictions=predictions)
    bertscore_score = bertscore.compute(references=references, predictions=predictions)

    return {
        'sacrebleu': sacrebleu_score['score'] / 100,
        **{
            f'bleu-{i}': s / 100
            for i, s in enumerate(sacrebleu_score['precisions'], start=1)
        },
        **meteor_score,
        'bertscore-precision': np.mean(bertscore_score['precision']),
        'bertscore-recall': np.mean(bertscore_score['recall']),
        'bertscore-f1': np.mean(bertscore_score['f1']),
    }

In [None]:
with open('annotation.json') as f:
    test = json.load(f)['test']
    test_ids = [t['id'] for t in test]
    labels = [t['labels'] for t in test]

In [None]:
for p in Path('results').glob('*.json'):
    with open(p) as f:
        texts = json.load(f)
        texts = pd.DataFrame(texts)

    texts['ground_truth'] = texts['ground_truth'].str.lower()
    texts['inference'] = texts['inference'].str.lower()

    metrics = compute_metrics(references=texts['ground_truth'].values, predictions=texts['inference'].values)

    texts['image_id'] = test_ids
    texts['labels'] = labels
    texts = texts[['image_id', 'labels', 'ground_truth', 'inference']]

    texts['bleu'] = texts.progress_apply(
        lambda x: sacrebleu.compute(references=[x['ground_truth']], predictions=[x['inference']])['score'],
        axis=1
    )
    texts = texts.sort_values(by=['bleu'], ascending=False)

    with open(f'results/metrics/{p.stem}_metrics.json', 'w') as f:
        json.dump({
            'original': metrics,
            'cleaned': compute_metrics(references=texts['ground_truth'].values, predictions=texts['inference'].values)
        }, f, indent=4)

    texts.to_csv(f'results/texts/{p.stem}_texts.csv', index=False)