# Analysis of calibration results

In [1]:
import os
import pandas as pd

In [2]:
def loss(row):
    # compute penality for failed attacks
    penalty = 0.35 * (row["fail"] / (row["fail"]+row["succ"]))

    # loss
    return -(row['avg_sentence_bert_similarity'] * (1-row['attack_contradiction_rate'])) + penalty

#### Calibration performed on `rotten tomatoes` dataset

In [3]:
dir_rotten = "./rotten-calibration"
rotten_results = [pd.read_csv(os.path.join(dir_rotten, file)) for file in os.listdir(dir_rotten)]

# merge dataframes vertically
rotten_result = pd.concat(rotten_results, axis=0, ignore_index=True)
rotten_result['objective'] = rotten_result.apply(loss, axis=1)
rotten_result.sort_values(by=['objective']).head(15)

Unnamed: 0,w1,w2,w3,succ,fail,skip,attack_contradiction_rate,avg_sentence_bert_similarity,loss,objective
62,0.713061,0.081928,0.205011,300.0,106.0,94.0,0.13,0.905,-0.735133,-0.695971
80,0.813817,0.005211,0.180972,296.0,110.0,94.0,0.128,0.904,-0.734101,-0.69346
149,0.735816,0.05759,0.206594,298.0,108.0,94.0,0.131,0.905,-0.733243,-0.693342
51,0.256207,0.253033,0.49076,301.0,105.0,94.0,0.136,0.907,-0.731924,-0.693131
132,0.748351,0.063436,0.188213,297.0,109.0,94.0,0.131,0.905,-0.73275,-0.692479
123,0.672501,0.045504,0.281995,301.0,105.0,94.0,0.136,0.906,-0.73106,-0.692267
28,0.322098,0.191489,0.486413,302.0,104.0,94.0,0.139,0.908,-0.730556,-0.692133
45,0.703666,0.062213,0.234121,301.0,105.0,94.0,0.136,0.905,-0.730196,-0.691403
1,0.264381,0.247054,0.488564,300.0,106.0,94.0,0.137,0.907,-0.730524,-0.691362
117,0.754889,0.06698,0.17813,296.0,110.0,94.0,0.132,0.905,-0.731353,-0.690712


#### Calibration performed on `yelp-polarity` dataset

In [4]:
dir_yelp = "./yelp-calibration"
yelp_results = [pd.read_csv(os.path.join(dir_yelp, file)) for file in os.listdir(dir_yelp)]

# merge dataframes vertically
yelp_result = pd.concat(yelp_results, axis=0, ignore_index=True)
yelp_result['objective'] = yelp_result.apply(loss, axis=1)
yelp_result.sort_values(by=['objective']).head(15)

Unnamed: 0,w1,w2,w3,succ,fail,skip,attack_contradiction_rate,avg_sentence_bert_similarity,loss,objective
12,0.284066,0.107318,0.608615,431.0,58.0,11.0,0.501,0.915,-0.426933,-0.415072
8,0.247249,0.104278,0.648473,431.0,58.0,11.0,0.501,0.915,-0.426933,-0.415072
18,0.488513,0.463658,0.047829,356.0,133.0,11.0,0.441,0.908,-0.439576,-0.412378
0,0.365364,0.057758,0.576878,429.0,60.0,11.0,0.503,0.916,-0.424577,-0.412307
23,0.238912,0.215388,0.5457,430.0,59.0,11.0,0.505,0.915,-0.422761,-0.410696
1,0.323198,0.150616,0.526185,431.0,58.0,11.0,0.506,0.915,-0.422358,-0.410497
22,0.376606,0.116121,0.507273,428.0,61.0,11.0,0.505,0.916,-0.422234,-0.409759
21,0.495187,0.486649,0.018164,356.0,133.0,11.0,0.444,0.907,-0.436296,-0.409098
20,0.487175,0.480805,0.03202,357.0,132.0,11.0,0.445,0.907,-0.4359,-0.408906
38,0.008272,0.21589,0.775839,399.0,90.0,11.0,0.489,0.924,-0.426152,-0.407747


In [10]:
yelp_result.sort_values(by=['objective']).head(10).to_latex(index=False)

  yelp_result.sort_values(by=['objective']).head(10).to_latex(index=False)


'\\begin{tabular}{rrrrrrrrrr}\n\\toprule\n      w1 &       w2 &       w3 &  succ &  fail &  skip &  attack\\_contradiction\\_rate &  avg\\_sentence\\_bert\\_similarity &      loss &  objective \\\\\n\\midrule\n0.284066 & 0.107318 & 0.608615 & 431.0 &  58.0 &  11.0 &                      0.501 &                         0.915 & -0.426933 &  -0.415072 \\\\\n0.247249 & 0.104278 & 0.648473 & 431.0 &  58.0 &  11.0 &                      0.501 &                         0.915 & -0.426933 &  -0.415072 \\\\\n0.488513 & 0.463658 & 0.047829 & 356.0 & 133.0 &  11.0 &                      0.441 &                         0.908 & -0.439576 &  -0.412378 \\\\\n0.365364 & 0.057758 & 0.576878 & 429.0 &  60.0 &  11.0 &                      0.503 &                         0.916 & -0.424577 &  -0.412307 \\\\\n0.238912 & 0.215388 & 0.545700 & 430.0 &  59.0 &  11.0 &                      0.505 &                         0.915 & -0.422761 &  -0.410696 \\\\\n0.323198 & 0.150616 & 0.526185 & 431.0 &  58.0 &  11.0 