In [1]:
from metrics import DistinctNgrams, EntropyNgrams, SelfBLEU

# Instantiate the DistinctNgrams class with n=1 and n=2
dist1 = DistinctNgrams(n=1)
dist2 = DistinctNgrams(n=2)
ent4 = EntropyNgrams(n=4)
sbleu = SelfBLEU(n=4)

MAX_TOKEN_LEN = 7012 
N_ITERATION = 100

In [2]:
import pandas as pd
import json
import time

def get_metrics(all_messages, assistant_messages, user_messages, significant_digits=3, dataset_name='no-name', m=MAX_TOKEN_LEN, n=N_ITERATION):
    """Calculate Distinct-1, Distinct-2, Entropy-4, and Self-BLEU metrics for 'both', 'assistant', and 'user' messages.
    """
    
    start_time = time.time()
    d1_both = dist1.calculate_truncated_distinct_n(all_messages, max_token_length=m, return_avg=False, iterations=n)
    d1_asst = dist1.calculate_truncated_distinct_n(assistant_messages, max_token_length=m, return_avg=False, iterations=n)
    d1_user = dist1.calculate_truncated_distinct_n(user_messages, max_token_length=m, return_avg=False, iterations=n)
    print(f"Dist-1: {time.time() - start_time} seconds")

    start_time = time.time()
    d2_both = dist2.calculate_truncated_distinct_n(all_messages, max_token_length=m, return_avg=False, iterations=n)
    d2_asst = dist2.calculate_truncated_distinct_n(assistant_messages, max_token_length=m, return_avg=False, iterations=n)
    d2_user = dist2.calculate_truncated_distinct_n(user_messages, max_token_length=m, return_avg=False, iterations=n)
    print(f"Dist-2: {time.time() - start_time} seconds")

    start_time = time.time()
    e4_both = ent4.calculate_truncated_entropy_n(all_messages, max_token_length=m, return_avg=False, iterations=n)
    e4_asst = ent4.calculate_truncated_entropy_n(assistant_messages, max_token_length=m, return_avg=False, iterations=n)
    e4_user = ent4.calculate_truncated_entropy_n(user_messages, max_token_length=m, return_avg=False, iterations=n)
    print(f"Ent-4: {time.time() - start_time} seconds")

    start_time = time.time()
    sb_both = sbleu.calculate_truncated_self_bleu(all_messages, max_token_length=m, return_avg=False, iterations=n)
    sb_asst = sbleu.calculate_truncated_self_bleu(assistant_messages, max_token_length=m, return_avg=False, iterations=n)
    sb_user = sbleu.calculate_truncated_self_bleu(user_messages, max_token_length=m, return_avg=False, iterations=n)
    print(f"Self-BLEU: {time.time() - start_time} seconds")

    # calculate average and convert to string with digit
    d1_both_avg, d1_asst_avg, d1_user_avg = str(round(sum(d1_both) / len(d1_both), significant_digits)), str(round(sum(d1_asst) / len(d1_asst), significant_digits)), str(round(sum(d1_user) / len(d1_user), significant_digits))
    d2_both_avg, d2_asst_avg, d2_user_avg = str(round(sum(d2_both) / len(d2_both), significant_digits)), str(round(sum(d2_asst) / len(d2_asst), significant_digits)), str(round(sum(d2_user) / len(d2_user), significant_digits))
    e4_both_avg, e4_asst_avg, e4_user_avg = str(round(sum(e4_both) / len(e4_both), significant_digits)), str(round(sum(e4_asst) / len(e4_asst), significant_digits)), str(round(sum(e4_user) / len(e4_user), significant_digits))
    sb_both_avg, sb_asst_avg, sb_user_avg = str(round(sum(sb_both) / len(sb_both), significant_digits)), str(round(sum(sb_asst) / len(sb_asst), significant_digits)), str(round(sum(sb_user) / len(sb_user), significant_digits))

    metrics_dict = {
        ('Dist-1/2', 'Both'): [ f"{d1_both_avg} / {d2_both_avg}" ], ('Dist-1/2', 'Asst'): [ f"{d1_asst_avg} / {d2_asst_avg}" ], ('Dist-1/2', 'User'): [ f"{d1_user_avg} / {d2_user_avg}" ],
        ('Ent-4', 'Both'): [ f"{e4_both_avg}" ], ('Ent-4', 'Asst'): [ f"{e4_asst_avg}" ], ('Ent-4', 'User'): [ f"{e4_user_avg}" ],
        ('Self-BLEU', 'Both'): [ f"{sb_both_avg}" ], ('Self-BLEU', 'Asst'): [ f"{sb_asst_avg}" ], ('Self-BLEU', 'User'): [ f"{sb_user_avg}" ],
    }
    df = pd.DataFrame(metrics_dict)
    df.index = [dataset_name]

    return df

import data_loader as dl

In [3]:
ours = dl.OursPublished(topic='recipe')
all_messages, assistant_messages, user_messages = ours.get_messages()
df_laps_recipe = get_metrics(all_messages, assistant_messages, user_messages, significant_digits=3, dataset_name='LAPS-Recipe')
df_laps_recipe

Dist-1: 2.427013635635376 seconds
Dist-2: 2.400749921798706 seconds
Ent-4: 13.937196493148804 seconds
Self-BLEU: 448.69724321365356 seconds


Unnamed: 0_level_0,Dist-1/2,Dist-1/2,Dist-1/2,Ent-4,Ent-4,Ent-4,Self-BLEU,Self-BLEU,Self-BLEU
Unnamed: 0_level_1,Both,Asst,User,Both,Asst,User,Both,Asst,User
LAPS-Recipe,0.207 / 0.65,0.207 / 0.61,0.188 / 0.641,8.563,8.506,8.575,0.955,0.958,0.956


In [4]:
ours = dl.OursPublished(topic='movie')
all_messages, assistant_messages, user_messages = ours.get_messages()
df_laps_movie = get_metrics(all_messages, assistant_messages, user_messages, significant_digits=3, dataset_name='LAPS-Movie')
df_laps_movie

Dist-1: 1.7006545066833496 seconds
Dist-2: 1.7576024532318115 seconds
Ent-4: 13.080648183822632 seconds
Self-BLEU: 429.71686911582947 seconds


Unnamed: 0_level_0,Dist-1/2,Dist-1/2,Dist-1/2,Ent-4,Ent-4,Ent-4,Self-BLEU,Self-BLEU,Self-BLEU
Unnamed: 0_level_1,Both,Asst,User,Both,Asst,User,Both,Asst,User
LAPS-Movie,0.222 / 0.666,0.225 / 0.625,0.202 / 0.661,8.593,8.534,8.59,0.954,0.957,0.954
