In [15]:
import glob
import pandas as pd
from IPython import display as ICD

In [2]:
def highlight_max(s):
    s = s.apply(lambda x: float(x.split(" ")[0]))
    is_max = s == s.max()
    return ["font-weight: bold" if cell else '' for cell in is_max]

def drop_columns_with_name(df, name):
    for c in df.columns:
        if name in c.lower():
            df.drop(c, axis=1, inplace=True)
    return df

def postprocess_results(results_path, precision=2, filter_cols=None, final_order=None):
    # read results
    df = pd.read_csv(results_path)
    df = drop_columns_with_name(df, "unnamed")
    
    # group for mean and std
    df_grouped = df.groupby(by=['technique'])
    means = df_grouped.mean()
    stds  = df_grouped.std()

    # store metadata to add back in later
    index = means.index.to_list()
    columns = means.columns

    # merge the means and stds
    merged_data = []
    for mean_row, std_row in zip(means.to_numpy(), stds.to_numpy()):
        new_row = []
        for m, s in zip(mean_row, std_row):
            payload = f"{round(m, precision)} ({round(s, precision)})"
            new_row.append(payload)
        merged_data.append(new_row)
    merged_df = pd.DataFrame(merged_data)
    merged_df.columns = means.columns
    merged_df.index = means.index

    # resort for final order
    if final_order is not None:
        merged_df = merged_df.reindex(final_order)
    
    # embolden the highest value in each column
    merged_df = merged_df.style.apply(highlight_max)

    # filter columns
    if filter_cols is not None:
        merged_df = merged_df[filter_cols]

    return merged_df

## Prepare data for paper

In [3]:
final_order = ['orig', 'random', 'beam', 'diverse_beam', 'dips', 'qcpg', 'textdiv', 'qcpgpp']

### Intrinsic

In [19]:
intrinsic_paths = glob.glob("intrinsic_*")

for intrinsic_path in intrinsic_paths:
    print(intrinsic_path)
    ICD.display(postprocess_results(intrinsic_path, final_order=final_order))

intrinsic_results_banking77.csv


Unnamed: 0_level_0,distinct_1-grams,distinct_2-grams,distinct_3-grams,distinct_4-grams,TokenSemantics,DocumentSemantics,PartOfSpeechSequence,Rhythmic,Phonemic,DependencyParse,Sacrebleu,Meteor,BLEURT,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
orig,11.05 (4.91),11.72 (5.14),11.72 (5.14),11.72 (5.14),13.48 (4.26),1.0 (0.0),1.0 (0.0),1.0 (0.0),7.41 (0.71),1.0 (0.0),100.0 (0.0),1.0 (0.0),1.0 (0.04),49.5 (29.01),1.49 (0.15)
random,18.57 (4.3),26.74 (6.77),32.41 (8.67),36.89 (10.23),23.48 (3.23),1.06 (0.04),1.19 (0.07),1.25 (0.11),8.78 (0.58),2.56 (0.94),29.91 (10.86),0.68 (0.04),0.29 (0.4),49.5 (29.01),5.35 (1.42)
beam,13.58 (4.11),18.09 (4.85),21.6 (5.15),25.11 (5.97),20.88 (3.3),1.04 (0.03),1.09 (0.06),1.16 (0.06),8.2 (0.88),1.99 (0.87),37.29 (15.43),0.73 (0.05),0.34 (0.36),49.5 (29.01),5.35 (1.43)
diverse_beam,13.76 (3.93),18.41 (4.42),22.07 (4.42),25.73 (5.01),20.94 (3.25),1.04 (0.03),1.1 (0.07),1.15 (0.06),8.24 (0.9),1.82 (0.59),38.68 (16.8),0.74 (0.05),0.34 (0.38),49.5 (29.01),3.62 (0.78)
dips,23.21 (3.95),34.71 (5.14),41.23 (7.81),45.71 (11.33),24.66 (4.02),1.07 (0.06),1.29 (0.12),1.33 (0.11),8.68 (0.39),2.99 (0.93),28.03 (13.93),0.68 (0.07),0.28 (0.35),49.5 (29.01),6.11 (1.92)
qcpg,26.19 (9.5),35.19 (13.63),38.83 (14.71),40.64 (14.1),27.58 (5.64),1.24 (0.07),1.64 (0.36),1.55 (0.17),8.82 (1.0),3.42 (1.15),8.63 (7.84),0.33 (0.1),-0.32 (0.21),49.5 (29.01),2.4 (0.26)
textdiv,18.23 (4.18),25.42 (5.68),30.44 (7.31),34.13 (8.5),23.3 (3.9),1.07 (0.06),1.25 (0.19),1.4 (0.19),8.3 (1.07),2.37 (0.7),24.07 (10.01),0.63 (0.06),0.22 (0.24),49.5 (29.01),67.66 (2.89)
qcpgpp,27.42 (10.38),36.53 (14.02),40.48 (15.33),42.28 (15.46),27.02 (6.68),1.19 (0.08),1.56 (0.12),1.71 (0.16),9.58 (0.77),3.52 (0.78),10.46 (13.21),0.36 (0.13),-0.12 (0.3),49.5 (29.01),2.37 (0.21)


intrinsic_results_snips_built_in_intents.csv


Unnamed: 0_level_0,distinct_1-grams,distinct_2-grams,distinct_3-grams,distinct_4-grams,TokenSemantics,DocumentSemantics,PartOfSpeechSequence,Rhythmic,Phonemic,DependencyParse,Sacrebleu,Meteor,BLEURT,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
orig,9.23 (3.65),9.99 (4.67),9.99 (4.67),9.99 (4.67),12.57 (3.77),1.0 (0.0),1.0 (0.0),1.0 (0.0),7.94 (1.47),1.0 (0.0),99.0 (10.0),1.0 (0.0),1.02 (0.02),49.5 (29.01),1.55 (0.18)
random,15.63 (3.25),23.24 (4.68),28.13 (5.37),31.98 (7.09),23.54 (4.44),1.04 (0.04),1.26 (0.08),1.31 (0.15),8.94 (0.85),3.23 (0.88),41.3 (18.95),0.74 (0.07),0.46 (0.21),49.5 (29.01),5.1 (1.18)
beam,12.33 (2.56),16.98 (4.18),20.61 (4.95),23.99 (5.66),19.55 (4.1),1.03 (0.03),1.13 (0.09),1.17 (0.09),8.32 (1.13),2.16 (0.86),49.39 (14.5),0.77 (0.08),0.54 (0.28),49.5 (29.01),5.12 (1.21)
diverse_beam,12.42 (2.94),17.18 (4.58),20.83 (5.38),24.22 (6.15),19.44 (4.07),1.02 (0.03),1.15 (0.09),1.16 (0.08),8.27 (1.16),2.32 (0.75),49.46 (14.61),0.77 (0.08),0.53 (0.28),49.5 (29.01),3.49 (0.71)
dips,20.43 (3.1),29.64 (5.45),35.63 (7.97),40.15 (10.53),24.97 (4.5),1.06 (0.05),1.4 (0.16),1.36 (0.13),8.87 (0.85),4.01 (0.64),38.06 (12.89),0.73 (0.08),0.33 (0.28),49.5 (29.01),5.73 (1.5)
qcpg,20.01 (5.32),29.12 (8.41),33.48 (9.79),34.91 (11.05),25.2 (5.79),1.17 (0.06),1.56 (0.16),1.56 (0.19),8.93 (0.99),3.75 (0.97),12.19 (5.93),0.44 (0.1),-0.19 (0.25),49.5 (29.01),2.32 (0.29)
textdiv,14.7 (2.33),20.36 (3.7),24.35 (4.45),27.61 (5.08),22.22 (3.66),1.04 (0.04),1.29 (0.22),1.28 (0.2),8.56 (0.99),3.22 (0.97),27.83 (17.57),0.66 (0.1),0.27 (0.21),49.5 (29.01),65.86 (3.42)
qcpgpp,22.31 (7.86),31.71 (11.93),35.59 (13.8),37.33 (14.54),26.74 (6.84),1.14 (0.04),1.64 (0.21),1.58 (0.19),8.64 (1.24),3.64 (0.82),9.91 (8.76),0.42 (0.12),-0.29 (0.27),49.5 (29.01),2.36 (0.28)


intrinsic_results_trec_coarse_label.csv


Unnamed: 0_level_0,distinct_1-grams,distinct_2-grams,distinct_3-grams,distinct_4-grams,TokenSemantics,DocumentSemantics,PartOfSpeechSequence,Rhythmic,Phonemic,DependencyParse,Sacrebleu,Meteor,BLEURT,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
orig,6.0 (0.0),6.0 (0.0),6.0 (0.0),6.0 (0.0),7.93 (0.08),1.0 (0.0),1.0 (0.0),1.0 (0.0),6.84 (0.11),1.0 (0.0),100.0 (0.0),1.0 (0.0),1.03 (0.0),49.5 (29.01),2.88 (2.0)
random,10.98 (0.2),14.98 (0.2),17.98 (0.2),18.0 (0.0),18.01 (0.46),1.11 (0.01),1.23 (0.01),1.32 (0.01),7.14 (0.03),2.58 (0.05),9.24 (3.09),0.88 (0.0),0.45 (0.03),49.5 (29.01),6.84 (4.89)
beam,7.02 (0.2),11.02 (0.2),14.02 (0.2),17.01 (0.1),16.05 (0.38),1.04 (0.0),1.09 (0.0),1.18 (0.06),7.28 (0.06),1.96 (0.03),9.63 (4.2),1.0 (0.01),0.72 (0.01),49.5 (29.01),7.05 (5.45)
diverse_beam,7.02 (0.2),11.02 (0.2),14.02 (0.2),17.01 (0.1),16.05 (0.38),1.04 (0.0),1.09 (0.0),1.18 (0.06),7.28 (0.06),1.96 (0.03),9.63 (4.2),1.0 (0.01),0.72 (0.01),49.5 (29.01),4.61 (3.37)
dips,15.01 (0.1),21.01 (0.1),22.03 (0.3),21.06 (0.6),20.68 (0.2),1.07 (0.0),1.32 (0.01),1.18 (0.04),7.97 (0.03),2.59 (0.06),7.19 (2.73),0.9 (0.02),0.22 (0.01),49.5 (29.01),8.02 (5.66)
qcpg,19.96 (0.4),22.98 (0.2),22.99 (0.1),22.99 (0.1),23.12 (0.44),1.27 (0.02),1.86 (0.05),1.85 (0.02),8.03 (0.1),4.76 (0.15),5.67 (0.21),0.61 (0.01),-0.49 (0.04),49.5 (29.01),3.7 (2.63)
textdiv,9.05 (0.5),14.05 (0.5),15.08 (0.8),16.1 (1.0),19.14 (0.17),1.05 (0.0),1.38 (0.02),1.13 (0.04),7.96 (0.14),4.83 (0.14),9.07 (2.79),0.84 (0.0),0.1 (0.03),49.5 (29.01),238.65 (532.12)
qcpgpp,16.94 (0.6),19.95 (0.5),21.94 (0.6),20.95 (0.5),22.77 (0.54),1.22 (0.01),2.2 (0.07),1.79 (0.02),8.02 (0.06),3.29 (0.07),6.44 (0.16),0.6 (0.02),-0.47 (0.03),49.5 (29.01),3.85 (2.76)


intrinsic_results_trec_fine_label.csv


Unnamed: 0_level_0,distinct_1-grams,distinct_2-grams,distinct_3-grams,distinct_4-grams,TokenSemantics,DocumentSemantics,PartOfSpeechSequence,Rhythmic,Phonemic,DependencyParse,Sacrebleu,Meteor,BLEURT,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
orig,8.44 (1.55),8.93 (2.05),8.93 (2.05),8.93 (2.05),9.83 (1.75),1.0 (0.0),1.0 (0.0),1.0 (0.0),7.2 (1.27),1.0 (0.0),100.0 (0.0),1.0 (0.0),0.98 (0.02),49.5 (29.01),2.44 (0.3)
random,14.48 (2.49),20.44 (3.51),24.41 (4.52),27.37 (5.55),20.17 (1.68),1.05 (0.01),1.36 (0.07),1.5 (0.09),8.28 (0.34),3.49 (0.76),29.93 (6.48),0.62 (0.04),0.33 (0.02),49.5 (29.01),7.6 (1.33)
beam,10.48 (1.5),14.98 (1.99),17.99 (1.99),20.95 (2.02),16.56 (1.66),1.03 (0.01),1.09 (0.06),1.33 (0.15),8.31 (0.32),1.62 (0.25),53.01 (11.61),0.79 (0.02),0.58 (0.04),49.5 (29.01),7.49 (1.39)
diverse_beam,11.46 (2.5),15.46 (2.5),18.95 (3.01),22.39 (3.57),16.68 (1.81),1.03 (0.01),1.15 (0.08),1.35 (0.13),8.39 (0.39),1.93 (0.29),48.94 (7.79),0.75 (0.03),0.54 (0.08),49.5 (29.01),4.7 (0.78)
dips,18.04 (2.03),25.98 (2.0),29.93 (3.07),31.39 (4.58),21.54 (2.94),1.04 (0.01),1.22 (0.05),1.45 (0.12),8.61 (0.37),3.04 (0.64),39.04 (5.01),0.73 (0.04),0.55 (0.08),49.5 (29.01),8.61 (1.78)
qcpg,16.45 (5.49),22.4 (7.5),24.42 (6.5),25.39 (5.53),18.54 (4.3),1.07 (0.01),1.62 (0.54),1.62 (0.37),8.51 (0.82),2.77 (0.8),22.0 (13.02),0.45 (0.15),0.04 (0.1),49.5 (29.01),3.61 (0.6)
textdiv,12.48 (0.56),18.44 (2.53),21.4 (2.59),21.89 (3.08),18.15 (0.91),1.04 (0.0),1.29 (0.04),1.7 (0.31),8.07 (0.61),2.4 (0.77),19.32 (8.11),0.54 (0.1),0.37 (0.17),49.5 (29.01),155.47 (10.93)
qcpgpp,20.92 (4.04),26.88 (5.08),27.88 (6.07),28.85 (7.09),20.29 (2.42),1.19 (0.11),1.6 (0.03),1.58 (0.07),9.05 (0.25),3.72 (0.09),22.21 (18.11),0.48 (0.25),0.05 (0.27),49.5 (29.01),3.68 (0.68)


intrinsic_results_tweet_eval_emotion.csv


Unnamed: 0_level_0,distinct_1-grams,distinct_2-grams,distinct_3-grams,distinct_4-grams,TokenSemantics,DocumentSemantics,PartOfSpeechSequence,Rhythmic,Phonemic,DependencyParse,Sacrebleu,Meteor,BLEURT,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
orig,8.55 (2.36),9.51 (2.29),9.51 (2.29),9.51 (2.29),12.64 (2.11),1.0 (0.0),1.0 (0.0),1.0 (0.0),7.76 (0.37),1.0 (0.0),100.0 (0.0),1.0 (0.0),0.95 (0.01),49.5 (29.01),1.53 (0.17)
random,15.54 (2.43),23.49 (2.42),30.42 (2.46),36.38 (2.65),23.35 (1.53),1.05 (0.01),1.19 (0.04),1.23 (0.03),7.38 (0.54),4.17 (0.59),67.28 (9.1),0.9 (0.05),0.43 (0.09),49.5 (29.01),5.31 (1.01)
beam,12.48 (2.38),16.53 (2.76),20.53 (2.94),24.48 (2.95),20.91 (1.6),1.02 (0.0),1.12 (0.03),1.08 (0.02),8.42 (0.28),3.41 (0.44),73.96 (10.56),0.94 (0.06),0.6 (0.11),49.5 (29.01),5.32 (1.12)
diverse_beam,12.47 (2.35),16.52 (2.69),20.52 (2.82),23.54 (2.92),21.04 (1.63),1.03 (0.01),1.07 (0.03),1.1 (0.02),8.43 (0.31),3.89 (0.56),72.27 (10.34),0.93 (0.06),0.58 (0.11),49.5 (29.01),3.56 (0.54)
dips,18.58 (2.55),28.63 (2.9),35.72 (3.55),40.85 (4.4),24.6 (1.64),1.04 (0.01),1.3 (0.05),1.29 (0.03),8.53 (0.27),4.7 (0.58),55.36 (5.94),0.88 (0.05),0.37 (0.09),49.5 (29.01),5.98 (1.29)
qcpg,20.88 (3.7),27.3 (5.46),29.45 (6.17),29.53 (6.54),28.32 (1.49),1.26 (0.03),1.74 (0.08),1.88 (0.09),8.66 (0.23),4.78 (0.3),8.59 (1.43),0.35 (0.03),-0.72 (0.07),49.5 (29.01),2.41 (0.16)
textdiv,13.53 (2.59),17.65 (3.14),21.71 (3.47),25.75 (3.75),15.01 (2.99),1.02 (0.01),1.06 (0.06),1.16 (0.03),9.12 (0.3),1.99 (0.34),23.61 (6.27),0.69 (0.03),-0.35 (0.18),49.5 (29.01),67.48 (2.34)
qcpgpp,25.7 (3.65),33.98 (5.12),36.12 (5.69),35.27 (6.15),31.0 (1.41),1.21 (0.03),2.11 (0.14),1.86 (0.09),8.71 (0.35),4.72 (0.36),12.32 (2.3),0.45 (0.06),-0.57 (0.05),49.5 (29.01),2.57 (0.15)


### Training

In [22]:
intrinsic_paths = glob.glob("train_*")

for intrinsic_path in intrinsic_paths:
    print(intrinsic_path)
    ICD.display(postprocess_results(intrinsic_path, precision=3, final_order=final_order))

train_results_banking77.csv


Unnamed: 0_level_0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
orig,0.45 (0.057),0.939 (0.01),5.616 (0.25),274.611 (12.533),17.297 (0.79),9.99 (0.0),1.0 (1.0),2034.596 (8.205)
random,0.557 (0.0),0.936 (0.0),5.737 (0.01),268.436 (0.458),16.908 (0.029),10.0 (0.0),1.0 (1.0),7632.155 (33.965)
beam,0.621 (0.0),0.936 (0.0),5.762 (0.046),267.276 (2.146),16.835 (0.135),10.0 (0.0),1.0 (1.0),7606.109 (39.62)
diverse_beam,0.549 (0.0),0.932 (0.0),5.784 (0.087),266.298 (3.964),16.773 (0.249),10.0 (0.0),1.0 (1.0),7634.032 (44.048)
dips,0.632 (0.007),0.93 (0.0),5.586 (0.273),276.145 (13.883),17.393 (0.875),10.0 (0.0),1.0 (1.0),7603.505 (12.819)
qcpg,0.518 (0.0),0.932 (0.0),5.782 (0.11),266.43 (5.016),16.782 (0.316),10.0 (0.0),1.0 (1.0),7513.686 (31.405)
textdiv,0.554 (0.0),0.935 (0.0),5.652 (0.041),272.463 (1.999),17.162 (0.126),10.0 (0.0),1.0 (1.0),7607.493 (21.487)
qcpgpp,0.456 (0.0),0.936 (0.0),5.811 (0.051),265.031 (2.32),16.694 (0.146),10.0 (0.0),1.0 (1.0),7513.282 (30.755)


train_results_glue_mrpc.csv


Unnamed: 0_level_0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
orig,0.796 (0.104),0.851 (0.008),11.745 (0.092),146.873 (1.158),9.195 (0.073),9.99 (0.0),1.0 (1.0),1096.426 (33.025)
random,1.252 (0.231),0.852 (0.009),11.812 (0.076),146.039 (0.942),9.143 (0.059),9.333 (1.155),1.0 (1.0),4039.752 (526.125)
beam,0.718 (0.186),0.842 (0.014),11.825 (0.135),145.889 (1.676),9.134 (0.105),8.0 (2.0),1.0 (1.0),3453.71 (870.588)
diverse_beam,0.97 (0.131),0.85 (0.012),11.805 (0.106),146.135 (1.317),9.149 (0.083),9.0 (1.732),1.0 (1.0),3884.486 (726.528)
dips,0.967 (0.502),0.842 (0.01),12.052 (0.406),143.236 (4.761),8.968 (0.298),8.667 (2.309),1.0 (1.0),3824.913 (1037.091)
qcpg,0.746 (0.164),0.723 (0.044),11.836 (0.178),145.763 (2.173),9.126 (0.136),7.667 (2.082),1.0 (1.0),3310.945 (924.711)
textdiv,0.992 (0.142),0.851 (0.006),11.95 (0.13),144.358 (1.574),9.038 (0.098),9.333 (1.155),1.0 (1.0),4087.811 (502.379)
qcpgpp,0.856 (0.358),0.718 (0.057),11.799 (0.07),146.207 (0.863),9.154 (0.054),7.333 (2.309),1.0 (1.0),3174.695 (1024.712)


train_results_snips_built_in_intents.csv


Unnamed: 0_level_0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
orig,0.312 (0.046),0.938 (0.0),0.078 (0.054),265.392 (131.493),16.587 (8.218),7.457 (0.56),1.0 (1.0),61.442 (13.413)
random,0.045 (0.074),0.99 (0.017),0.262 (0.032),254.76 (29.314),19.3 (2.221),7.283 (0.577),1.0 (1.0),132.382 (14.452)
beam,0.001 (0.001),1.0 (0.0),0.23 (0.008),287.523 (10.496),21.782 (0.795),7.613 (1.149),1.0 (1.0),139.564 (24.017)
diverse_beam,0.019 (0.03),0.995 (0.009),0.272 (0.036),245.718 (30.132),18.615 (2.283),6.62 (0.572),1.0 (1.0),121.505 (9.972)
dips,0.025 (0.035),0.995 (0.009),0.297 (0.028),223.247 (19.895),16.913 (1.507),7.613 (1.149),1.0 (1.0),138.076 (16.452)
qcpg,0.23 (0.18),0.955 (0.015),0.297 (0.056),228.339 (47.203),17.298 (3.576),8.61 (1.143),1.0 (1.0),158.108 (25.048)
textdiv,0.005 (0.005),1.0 (0.0),0.208 (0.009),317.241 (13.349),24.033 (1.011),6.95 (0.0),1.0 (1.0),129.349 (1.142)
qcpgpp,0.226 (0.115),0.955 (0.0),0.25 (0.125),322.678 (183.305),24.445 (13.887),9.283 (1.155),1.0 (1.0),169.499 (24.555)


train_results_snips_built_in_intents_distilbert.csv


Unnamed: 0_level_0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
orig,0.615 (0.295),0.917 (0.095),0.04 (0.042),741.212 (486.533),46.326 (30.408),7.457 (0.56),1.0 (1.0),27.351 (1.967)
random,0.009 (0.004),1.0 (0.0),0.103 (0.034),683.483 (194.116),51.779 (14.706),7.283 (0.577),1.0 (1.0),66.191 (4.267)
beam,0.119 (0.196),1.0 (0.0),0.075 (0.006),878.475 (67.087),66.551 (5.083),6.953 (0.995),1.0 (1.0),64.054 (7.153)
diverse_beam,0.008 (0.006),1.0 (0.0),0.071 (0.008),935.357 (106.853),70.86 (8.095),7.283 (0.577),1.0 (1.0),64.961 (5.392)
dips,0.063 (0.033),0.985 (0.0),0.129 (0.054),582.994 (260.252),44.166 (19.716),6.95 (0.0),1.0 (1.0),66.881 (1.892)
qcpg,0.174 (0.089),0.939 (0.015),0.084 (0.036),876.701 (303.696),66.417 (23.007),8.61 (1.143),1.0 (1.0),79.809 (9.627)
textdiv,0.004 (0.003),1.0 (0.0),0.079 (0.0),840.03 (0.634),63.639 (0.048),7.617 (0.577),1.0 (1.0),68.85 (4.377)
qcpgpp,0.222 (0.068),0.944 (0.017),0.126 (0.095),713.826 (386.756),54.078 (29.3),8.277 (1.149),1.0 (1.0),75.549 (12.418)


train_results_tweet_eval_emotion.csv


Unnamed: 0_level_0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,run_num,run_time
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
orig,1.149 (0.111),0.811 (0.004),10.316 (0.793),138.279 (10.245),8.661 (0.642),9.98 (0.0),1.0 (1.0),1749.626 (207.253)
random,1.97 (0.0),0.777 (0.0),10.792 (0.799),132.175 (10.163),8.278 (0.637),10.0 (0.0),1.0 (1.0),8101.543 (437.1)
beam,1.872 (0.0),0.789 (0.0),11.005 (1.298),130.33 (15.351),8.163 (0.961),10.0 (0.0),1.0 (1.0),8094.111 (591.067)
diverse_beam,1.667 (0.0),0.787 (0.0),11.074 (1.072),129.17 (13.23),8.09 (0.829),10.0 (0.0),1.0 (1.0),8229.643 (579.571)
dips,1.959 (0.0),0.787 (0.0),11.28 (0.6),126.202 (6.508),7.904 (0.407),10.0 (0.0),1.0 (1.0),8437.534 (87.692)
qcpg,1.879 (0.0),0.768 (0.0),10.373 (0.634),137.333 (8.357),8.601 (0.524),10.0 (0.0),1.0 (1.0),5347.69 (294.37)
textdiv,1.245 (0.0),0.754 (0.0),10.97 (0.201),129.559 (2.398),8.115 (0.15),7.0 (0.0),1.0 (1.0),5793.086 (35.966)
qcpgpp,1.906 (0.0),0.775 (0.0),10.866 (0.239),130.821 (2.91),8.194 (0.182),10.0 (0.0),1.0 (1.0),5343.963 (310.947)
