In [None]:
import json
from collections import Counter
from pathlib import Path
import numpy as np

import nltk.translate.bleu_score as bleu
from nltk.translate import bleu_score as nltkbleu


def eval_dist_from_json(json_file):
    with open(json_file, "r") as f:
        datas = json.load(f)
    sents = [data["beam_text"] for data in datas]

    unigrams = Counter()
    bigrams = Counter()
    trigrams = Counter()
    tot_tokens = 0
    for sent in sents:
        words = sent.strip().split()
        unigrams.update(words)
        bigrams.update(zip(words[:-1], words[1:]))
        trigrams.update(zip(words[:-2], words[1:-1], words[2:]))
        tot_tokens += len(words)
        
    print(f"{(len(unigrams) / float(tot_tokens)):.3f}", 
          f"{(len(bigrams) / float(tot_tokens)):.3f}", 
          f"{(len(trigrams) / float(tot_tokens)):.3f}", 
          "\n")

def bleu_from_json(json_file, k=2):
    weights = [1 / k for _ in range(k)]
    with open(json_file, "r") as f:
        datas = json.load(f)
    texts = [data["beam_text"] for data in datas]
    cands = [data["candidate"][0] for data in datas]
    bleu_scores = []
    for text, cand in zip(texts, cands):
        bleu_score = bleu.sentence_bleu(
            [cand.split(" ")],
            text.split(" "),  
            smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1,
            weights=weights,
        )
        bleu_scores.append(bleu_score)
    return bleu_scores