In [7]:
import plotly.graph_objects as go
import plotly.io as pio
import numpy as np
def get_mean_and_error(scores):
    multiplier_scores = []
    for rand_trial_scores in scores:
        if len(multiplier_scores) == 0:
            for idx, score in enumerate(rand_trial_scores):
                multiplier_scores.append([score])
        else:
            for idx, score in enumerate(rand_trial_scores):
                multiplier_scores[idx].append(score)
    multiplier_scores_mean = [np.mean(scores) for scores in multiplier_scores]
    multiplier_scores_err = [1.96*np.std(scores, ddof=1)/np.sqrt(len(scores)) for scores in multiplier_scores]
    return multiplier_scores_mean, multiplier_scores_err


def visualize_scores(all_scores, labels, metric_name, out_path):
    fig = go.Figure()
    for scores, label in zip(all_scores, labels):
        
        if not isinstance(scores[0], list):
            multiplier = [i for i in range(len(scores))]
            fig.add_trace(go.Scatter(
                x=multiplier, y=scores,
                mode="lines+markers",
                name=label
            ))
        else:
            scores_mean, scores_err = get_mean_and_error(scores)
            multiplier = [i for i in range(len(scores_mean))]
            fig.add_trace(go.Scatter(
                x=multiplier, y=scores_mean,
                mode="lines+markers",
                error_y=dict(
                    type="data",
                    array=scores_err,  # Error magnitude
                    visible=True
                ),
                name=label
            ))

    fig.update_layout(xaxis_title="Intervention Multiplier Constant", yaxis_title=f"{metric_name}")
    fig.show()
        
    pio.full_figure_for_development(fig, warn=False)

    fig.write_image(f'{out_path}.pdf', engine="kaleido")
    fig.write_image(f'{out_path}.png', engine="kaleido")

In [8]:
# BLEU
llama_sw = [0.266, 0.255, 0.045, 0.046, 0.045, 0.046]
llama_rand1 = [0.253, 0.255, 0.253,0.250, 0.243, 0.241]
llama_rand2 = [0.259, 0.255, 0.244, 0.212, 0.169, 0.241]
llama_rand3 = [0.250, 0.255, 0.251, 0.244, 0.225, 0.209]
llama_rand4 = [0.253, 0.255, 0.247, 0.241, 0.223, 0.205]
llama_rand5 = [0.182, 0.255, 0.270, 0.264, 0.260, 0.258]
llama_rand = [llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]

qwen_sw = [0.011, 0.064, 0.064, 0.004, 0.042, 0.042]
qwen_rand1 = [0.003, 0.064, 0.064,0.059, 0.062, 0.060]
qwen_rand2 = [0.062, 0.064, 0.062, 0.063, 0.067, 0.068]
qwen_rand3 = [0.061, 0.064, 0.063, 0.058, 0.058, 0.055]
qwen_rand4 = [0.063, 0.064, 0.062, 0.063, 0.063, 0.064]
qwen_rand5 = [0.063, 0.064, 0.067, 0.060, 0.064, 0.064]
qwen_rand = [qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

labels = ['llama-3.2-3b-instruct_sw-points', 'llama-3.2-3b-instruct_random', 'qwen-7b-instruct_sw-points', 'qwen-7b-instruct_random']
all_scores = [llama_sw, llama_rand, qwen_sw, qwen_rand]
metric_name = 'BLEU'
out_path = '../figures/bleu-multiplier-effects'
visualize_scores(all_scores, labels, metric_name, out_path)


In [9]:
# COMET
llama_sw = [0.76,0.73,0.69,0.68,0.67,0.62]
llama_rand1 = [0.72,0.73,0.73,0.73,0.72,0.72]
llama_rand2 = [0.73,0.73, 0.72,0.69,0.66,0.63]
llama_rand3 = [0.73,0.73,0.72,0.71,0.7,0.68]
llama_rand4 = [0.73,0.73,0.72,0.71,0.69,0.67]
llama_rand5 = [0.65,0.73,0.75,0.75,0.74,0.74]
llama_rand = [llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]

qwen_sw = [0.4,0.53,0.55,0.56,0.57,0.57]
qwen_rand1 = [0.37,0.53,0.54,0.54,0.54,0.53]
qwen_rand2 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand3 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand4 = [0.54,0.53,0.55,0.55,0.55,0.55]
qwen_rand5 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand = [qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

labels = ['llama-3.2-3b-instruct_sw-points', 'llama-3.2-3b-instruct_random', 'qwen-7b-instruct_sw-points', 'qwen-7b-instruct_random']
all_scores = [llama_sw, llama_rand, qwen_sw, qwen_rand]
metric_name = 'COMET'
out_path = '../figures/comet-multiplier-effects'
visualize_scores(all_scores, labels, metric_name, out_path)


In [10]:
# CS Proba
llama_sw = [0.01,0.02,0.38,0.47,0.39,0.31]
llama_rand1 = [0.04,0.02,0.03,0.02,0.03,0.05]
llama_rand2 = [0.03,0.02,0.03,0.07,0.1,0.22]
llama_rand3 = [0.04,0.02,0.04,0.06,0.07,0.05]
llama_rand4 = [0.02,0.02,0.03,0.05,0.06,0.08]
llama_rand5 = [0.12,0.02,0.02,0.03,0.04,0.05]
llama_rand = [llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]

qwen_sw = [0.3,0.40,0.45,0.42,0.47,0.35]
qwen_rand1 = [0.4,0.4,0.37,0.38,0.4,0.37]
qwen_rand2 = [0.4,0.4,0.39,0.38,0.4,0.37]
qwen_rand3 = [0.42,0.4,0.44,0.39,0.39,0.4]
qwen_rand4 = [0.43,0.4,0.39,0.37,0.41,0.43]
qwen_rand5 = [0.37,0.4,0.39,0.39,0.45,0.43]
qwen_rand = [qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

labels = ['llama-3.2-3b-instruct_sw-points', 'llama-3.2-3b-instruct_random', 'qwen-7b-instruct_sw-points', 'qwen-7b-instruct_random']
all_scores = [llama_sw, llama_rand, qwen_sw, qwen_rand]
metric_name = 'CS Probability'
out_path = '../figures/cs-proba-multiplier-effects'
visualize_scores(all_scores, labels, metric_name, out_path)


In [4]:
import numpy as np
import scipy.stats as stats
from sklearn.linear_model import LinearRegression
def get_corr(all_var1_scores, all_var2_scores, x_title, y_title, out_path):
    agg_var1 = []
    agg_var2 = []
    for var1_scores, var2_scores in zip(all_var1_scores, all_var2_scores):
        var1_scores = var1_scores[1:]
        var1_scores_diff = []
        for idx, ele in enumerate(var1_scores):
            if idx==0:
                continue
            var1_scores_diff.append(var1_scores[idx]-var1_scores[0])
        
        agg_var1.extend(var1_scores_diff)
        agg_var2.extend(var2_scores)
        assert len(agg_var1) == len (agg_var2)
    agg_var1 = np.array(agg_var1)
    agg_var2 = np.array(agg_var2)
    print(agg_var1)
    print(agg_var2)
    correlation, p_value = stats.spearmanr(agg_var2, agg_var1)
    print(f"Spearman Rho Coefficient: {correlation:.4f}")
    print(f"P-value: {p_value:.4f}")

    
    

In [24]:
# JS Divergence Llama vs BLEU

llama_sw = [0.76,0.73,0.69,0.68,0.67,0.62]
llama_rand1 = [0.72,0.73,0.73,0.73,0.72,0.72]
llama_rand2 = [0.73,0.73, 0.72,0.69,0.66,0.63]
llama_rand3 = [0.73,0.73,0.72,0.71,0.7,0.68]
llama_rand4 = [0.73,0.73,0.72,0.71,0.69,0.67]
llama_rand5 = [0.65,0.73,0.75,0.75,0.74,0.74]
llama_comet = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_comet, js_div_llama, x_title, y_title, out_path)


[-0.04 -0.05 -0.06 -0.11  0.    0.   -0.01 -0.01 -0.01 -0.04 -0.07 -0.1
 -0.01 -0.02 -0.03 -0.05 -0.01 -0.02 -0.04 -0.06  0.02  0.02  0.01  0.01]
[0.34 0.45 0.5  0.62 0.01 0.04 0.08 0.14 0.02 0.07 0.16 0.3  0.01 0.04
 0.08 0.14 0.01 0.05 0.12 0.23 0.03 0.07 0.11 0.13]
Spearman Rho Coefficient: -0.7015
P-value: 0.0001


In [6]:
# JS Divergence Llama vs COMET

js_div_llama_sw_point = [0.34, 0.45, 0.50, 0.62]
js_div_llama_rand1 = [0.01,0.04,0.08,0.14]
js_div_llama_rand2 = [0.02,0.07,0.16,0.30]
js_div_llama_rand3 = [0.01,0.04,0.08,0.14]
js_div_llama_rand4 = [0.01,0.05,0.12,0.23]
js_div_llama_rand5 = [0.03,0.07,0.11,0.13]
js_div_llama = [js_div_llama_sw_point, js_div_llama_rand1, js_div_llama_rand2, js_div_llama_rand3, js_div_llama_rand4, js_div_llama_rand5]

llama_sw = [0.266, 0.255, 0.045, 0.046, 0.045, 0.046]
llama_rand1 = [0.253, 0.255, 0.253,0.250, 0.243, 0.241]
llama_rand2 = [0.259, 0.255, 0.244, 0.212, 0.169, 0.241]
llama_rand3 = [0.250, 0.255, 0.251, 0.244, 0.225, 0.209]
llama_rand4 = [0.253, 0.255, 0.247, 0.241, 0.223, 0.205]
llama_rand5 = [0.182, 0.255, 0.270, 0.264, 0.260, 0.258]
llama_bleu = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_bleu, js_div_llama, x_title, y_title, out_path)


[-0.21  -0.209 -0.21  -0.209 -0.002 -0.005 -0.012 -0.014 -0.011 -0.043
 -0.086 -0.014 -0.004 -0.011 -0.03  -0.046 -0.008 -0.014 -0.032 -0.05
  0.015  0.009  0.005  0.003]
[0.34 0.45 0.5  0.62 0.01 0.04 0.08 0.14 0.02 0.07 0.16 0.3  0.01 0.04
 0.08 0.14 0.01 0.05 0.12 0.23 0.03 0.07 0.11 0.13]
Spearman Rho Coefficient: -0.7505
P-value: 0.0000


In [25]:
# JS Divergence Llama vs  CS Proba


llama_sw = [0.01,0.02,0.38,0.47,0.39,0.31]
llama_rand1 = [0.04,0.02,0.03,0.02,0.03,0.05]
llama_rand2 = [0.03,0.02,0.03,0.07,0.1,0.22]
llama_rand3 = [0.04,0.02,0.04,0.06,0.07,0.05]
llama_rand4 = [0.02,0.02,0.03,0.05,0.06,0.08]
llama_rand5 = [0.12,0.02,0.02,0.03,0.04,0.05]
llama_proba = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_proba, js_div_llama, x_title, y_title, out_path)


[0.36 0.45 0.37 0.29 0.01 0.   0.01 0.03 0.01 0.05 0.08 0.2  0.02 0.04
 0.05 0.03 0.01 0.03 0.04 0.06 0.   0.01 0.02 0.03]
[0.34 0.45 0.5  0.62 0.01 0.04 0.08 0.14 0.02 0.07 0.16 0.3  0.01 0.04
 0.08 0.14 0.01 0.05 0.12 0.23 0.03 0.07 0.11 0.13]
Spearman Rho Coefficient: 0.8201
P-value: 0.0000


In [5]:
# JS Divergence Qwen vs  BLEU
js_div_qwen_sw_point = [0.07, 0.29, 0.51, 0.61]
js_div_qwen_rand1 = [0.01,0.05,0.13,0.24]
js_div_qwen_rand2 = [0.005,0.020,0.048,0.094]
js_div_qwen_rand3 = [0.009,0.038,0.091,0.168]
js_div_qwen_rand4 = [0.004,0.015,0.030,0.046]
js_div_qwen_rand5 = [0.008,0.025,0.045,0.067]
js_div_qwen = [js_div_qwen_sw_point, js_div_qwen_rand1, js_div_qwen_rand2, js_div_qwen_rand3, js_div_qwen_rand4, js_div_qwen_rand5]

qwen_sw = [0.011, 0.064, 0.064, 0.004, 0.042, 0.042]
qwen_rand1 = [0.003, 0.064, 0.064,0.059, 0.062, 0.060]
qwen_rand2 = [0.062, 0.064, 0.062, 0.063, 0.067, 0.068]
qwen_rand3 = [0.061, 0.064, 0.063, 0.058, 0.058, 0.055]
qwen_rand4 = [0.063, 0.064, 0.062, 0.063, 0.063, 0.064]
qwen_rand5 = [0.063, 0.064, 0.067, 0.060, 0.064, 0.064]
qwen_bleu = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_bleu, js_div_qwen, x_title, y_title, out_path)

[ 0.    -0.06  -0.022 -0.022  0.    -0.005 -0.002 -0.004 -0.002 -0.001
  0.003  0.004 -0.001 -0.006 -0.006 -0.009 -0.002 -0.001 -0.001  0.
  0.003 -0.004  0.     0.   ]
[0.07  0.29  0.51  0.61  0.01  0.05  0.13  0.24  0.005 0.02  0.048 0.094
 0.009 0.038 0.091 0.168 0.004 0.015 0.03  0.046 0.008 0.025 0.045 0.067]
Spearman Rho Coefficient: -0.4432
P-value: 0.0301


In [None]:
qwen_sw = [0.011, 0.064, 0.064, 0.004, 0.042, 0.042]
qwen_rand1 = [0.003, 0.064, 0.064,0.059, 0.062, 0.060]
qwen_rand2 = [0.062, 0.064, 0.062, 0.063, 0.067, 0.068]
qwen_rand3 = [0.061, 0.064, 0.063, 0.058, 0.058, 0.055]
qwen_rand4 = [0.063, 0.064, 0.062, 0.063, 0.063, 0.064]
qwen_rand5 = [0.063, 0.064, 0.067, 0.060, 0.064, 0.064]
qwen_bleu = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_bleu, js_div_qwen, x_title, y_title, out_path)

In [27]:
qwen_sw = [0.4,0.53,0.55,0.56,0.57,0.57]
qwen_rand1 = [0.37,0.53,0.54,0.54,0.54,0.53]
qwen_rand2 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand3 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand4 = [0.54,0.53,0.55,0.55,0.55,0.55]
qwen_rand5 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_comet = [qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_comet, js_div_qwen, x_title, y_title, out_path)

[0.01 0.01 0.01 0.   0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.02 0.02
 0.02 0.02 0.01 0.01 0.01 0.01]
[0.07  0.29  0.51  0.61  0.01  0.05  0.13  0.24  0.01  0.02  0.05  0.09
 0.01  0.04  0.09  0.17  0.004 0.01  0.03  0.05 ]
Spearman Rho Coefficient: -0.1711
P-value: 0.4709


In [31]:
qwen_sw = [0.4,0.53,0.55,0.56,0.57,0.57]
qwen_rand1 = [0.37,0.53,0.54,0.54,0.54,0.53]
qwen_rand2 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand3 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand4 = [0.54,0.53,0.55,0.55,0.55,0.55]
qwen_rand5 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_comet = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_comet, js_div_qwen, x_title, y_title, out_path)

[0.02 0.03 0.04 0.04 0.01 0.01 0.01 0.   0.01 0.01 0.01 0.01 0.01 0.01
 0.01 0.01 0.02 0.02 0.02 0.02 0.01 0.01 0.01 0.01]
[0.07  0.29  0.51  0.61  0.01  0.05  0.13  0.24  0.005 0.02  0.048 0.094
 0.009 0.038 0.091 0.168 0.004 0.015 0.03  0.046 0.008 0.025 0.045 0.067]
Spearman Rho Coefficient: 0.1906
P-value: 0.3723


In [32]:
qwen_sw = [0.3,0.40,0.45,0.42,0.47,0.35]
qwen_rand1 = [0.4,0.4,0.37,0.38,0.4,0.37]
qwen_rand2 = [0.4,0.4,0.39,0.38,0.4,0.37]
qwen_rand3 = [0.42,0.4,0.44,0.39,0.39,0.4]
qwen_rand4 = [0.43,0.4,0.39,0.37,0.41,0.43]
qwen_rand5 = [0.37,0.4,0.39,0.39,0.45,0.43]
qwen_proba = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_proba, js_div_qwen, x_title, y_title, out_path)

[ 0.05  0.02  0.07 -0.05 -0.03 -0.02  0.   -0.03 -0.01 -0.02  0.   -0.03
  0.04 -0.01 -0.01  0.   -0.01 -0.03  0.01  0.03 -0.01 -0.01  0.05  0.03]
[0.07  0.29  0.51  0.61  0.01  0.05  0.13  0.24  0.005 0.02  0.048 0.094
 0.009 0.038 0.091 0.168 0.004 0.015 0.03  0.046 0.008 0.025 0.045 0.067]
Spearman Rho Coefficient: 0.1011
P-value: 0.6381


In [7]:
# Ent Diff Llama vs BLEU
entropy_llama_sw_point = [3.28, 5.51, 7.34, 12.39]
entropy_llama_rand1 = [0.12,0.59,1.36,2.31]
entropy_llama_rand2 = [0.52,1.34,2.63,4.75]
entropy_llama_rand3 = [0.14,0.53,1.11,1.87]
entropy_llama_rand4 = [-0.01,0.33,1.11,2.75]
entropy_llama_rand5 = [-0.18,0.022,0.107,0.895]
entropy_llama = [entropy_llama_sw_point, entropy_llama_rand1, entropy_llama_rand2, entropy_llama_rand3, entropy_llama_rand4, entropy_llama_rand5]

llama_sw = [0.266, 0.255, 0.045, 0.046, 0.045, 0.046]
llama_rand1 = [0.253, 0.255, 0.253,0.250, 0.243, 0.241]
llama_rand2 = [0.259, 0.255, 0.244, 0.212, 0.169, 0.241]
llama_rand3 = [0.250, 0.255, 0.251, 0.244, 0.225, 0.209]
llama_rand4 = [0.253, 0.255, 0.247, 0.241, 0.223, 0.205]
llama_rand5 = [0.182, 0.255, 0.270, 0.264, 0.260, 0.258]
llama_bleu = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_bleu, entropy_llama, x_title, y_title, out_path)


[-0.21  -0.209 -0.21  -0.209 -0.002 -0.005 -0.012 -0.014 -0.011 -0.043
 -0.086 -0.014 -0.004 -0.011 -0.03  -0.046 -0.008 -0.014 -0.032 -0.05
  0.015  0.009  0.005  0.003]
[ 3.280e+00  5.510e+00  7.340e+00  1.239e+01  1.200e-01  5.900e-01
  1.360e+00  2.310e+00  5.200e-01  1.340e+00  2.630e+00  4.750e+00
  1.400e-01  5.300e-01  1.110e+00  1.870e+00 -1.000e-02  3.300e-01
  1.110e+00  2.750e+00 -1.800e-01  2.200e-02  1.070e-01  8.950e-01]
Spearman Rho Coefficient: -0.8759
P-value: 0.0000


In [36]:
# Ent Diff Llama vs Comet
llama_sw = [0.76,0.73,0.69,0.68,0.67,0.62]
llama_rand1 = [0.72,0.73,0.73,0.73,0.72,0.72]
llama_rand2 = [0.73,0.73, 0.72,0.69,0.66,0.63]
llama_rand3 = [0.73,0.73,0.72,0.71,0.7,0.68]
llama_rand4 = [0.73,0.73,0.72,0.71,0.69,0.67]
llama_rand5 = [0.65,0.73,0.75,0.75,0.74,0.74]
llama_comet = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_comet, entropy_llama, x_title, y_title, out_path)

[-0.04 -0.05 -0.06 -0.11  0.    0.   -0.01 -0.01 -0.01 -0.04 -0.07 -0.1
 -0.01 -0.02 -0.03 -0.05 -0.01 -0.02 -0.04 -0.06  0.02  0.02  0.01  0.01]
[ 3.280e+00  5.510e+00  7.340e+00  1.239e+01  1.200e-01  5.900e-01
  1.360e+00  2.310e+00  5.200e-01  1.340e+00  2.630e+00  4.750e+00
  1.400e-01  5.300e-01  1.110e+00  1.870e+00 -1.000e-02  3.300e-01
  1.110e+00  2.750e+00 -1.800e-01  2.200e-02  1.070e-01  8.950e-01]
Spearman Rho Coefficient: -0.8413
P-value: 0.0000


In [37]:
# Ent Diff Llama vs CS Proba
llama_sw = [0.01,0.02,0.38,0.47,0.39,0.31]
llama_rand1 = [0.04,0.02,0.03,0.02,0.03,0.05]
llama_rand2 = [0.03,0.02,0.03,0.07,0.1,0.22]
llama_rand3 = [0.04,0.02,0.04,0.06,0.07,0.05]
llama_rand4 = [0.02,0.02,0.03,0.05,0.06,0.08]
llama_rand5 = [0.12,0.02,0.02,0.03,0.04,0.05]
llama_proba = [llama_sw, llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(llama_proba, entropy_llama, x_title, y_title, out_path)

[0.36 0.45 0.37 0.29 0.01 0.   0.01 0.03 0.01 0.05 0.08 0.2  0.02 0.04
 0.05 0.03 0.01 0.03 0.04 0.06 0.   0.01 0.02 0.03]
[ 3.280e+00  5.510e+00  7.340e+00  1.239e+01  1.200e-01  5.900e-01
  1.360e+00  2.310e+00  5.200e-01  1.340e+00  2.630e+00  4.750e+00
  1.400e-01  5.300e-01  1.110e+00  1.870e+00 -1.000e-02  3.300e-01
  1.110e+00  2.750e+00 -1.800e-01  2.200e-02  1.070e-01  8.950e-01]
Spearman Rho Coefficient: 0.8386
P-value: 0.0000


In [8]:
# Ent Diff Qwen vs BLEU
entropy_qwen_sw_point = [1.09, 8.097, 15.472, 18.55]
entropy_qwen_rand1 = [0.164,0.550,1.310,2.479]
entropy_qwen_rand2 = [0.069,0.196,0.430,0.875]
entropy_qwen_rand3 = [0.130,0.397,0.873,1.611]
entropy_qwen_rand4 = [-0.021,-0.012,0.026,0.073]
entropy_qwen_rand5 = [0.0151,0.101,0.256,0.508]
entropy_qwen = [entropy_qwen_sw_point, entropy_qwen_rand1, entropy_qwen_rand2, entropy_qwen_rand3, entropy_qwen_rand4, entropy_qwen_rand5]

qwen_sw = [0.011, 0.064, 0.064, 0.004, 0.042, 0.042]
qwen_rand1 = [0.003, 0.064, 0.064,0.059, 0.062, 0.060]
qwen_rand2 = [0.062, 0.064, 0.062, 0.063, 0.067, 0.068]
qwen_rand3 = [0.061, 0.064, 0.063, 0.058, 0.058, 0.055]
qwen_rand4 = [0.063, 0.064, 0.062, 0.063, 0.063, 0.064]
qwen_rand5 = [0.063, 0.064, 0.067, 0.060, 0.064, 0.064]
qwen_bleu = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_bleu, entropy_qwen, x_title, y_title, out_path)


[ 0.    -0.06  -0.022 -0.022  0.    -0.005 -0.002 -0.004 -0.002 -0.001
  0.003  0.004 -0.001 -0.006 -0.006 -0.009 -0.002 -0.001 -0.001  0.
  0.003 -0.004  0.     0.   ]
[ 1.0900e+00  8.0970e+00  1.5472e+01  1.8550e+01  1.6400e-01  5.5000e-01
  1.3100e+00  2.4790e+00  6.9000e-02  1.9600e-01  4.3000e-01  8.7500e-01
  1.3000e-01  3.9700e-01  8.7300e-01  1.6110e+00 -2.1000e-02 -1.2000e-02
  2.6000e-02  7.3000e-02  1.5100e-02  1.0100e-01  2.5600e-01  5.0800e-01]
Spearman Rho Coefficient: -0.4739
P-value: 0.0193


In [39]:
# Ent Diff Qwen vs COMET

qwen_sw = [0.4,0.53,0.55,0.56,0.57,0.57]
qwen_rand1 = [0.37,0.53,0.54,0.54,0.54,0.53]
qwen_rand2 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand3 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_rand4 = [0.54,0.53,0.55,0.55,0.55,0.55]
qwen_rand5 = [0.55,0.53,0.54,0.54,0.54,0.54]
qwen_comet = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]
x_title = 'JS Divergence with Clean Output (0-1)'
y_title = 'BLEU Difference(-1 - 1)'
out_path = '../figures/llama-js-div-bleu'
get_corr(qwen_comet, entropy_qwen, x_title, y_title, out_path)

[0.02 0.03 0.04 0.04 0.01 0.01 0.01 0.   0.01 0.01 0.01 0.01 0.01 0.01
 0.01 0.01 0.02 0.02 0.02 0.02 0.01 0.01 0.01 0.01]
[ 1.0900e+00  8.0970e+00  1.5472e+01  1.8550e+01  1.6400e-01  5.5000e-01
  1.3100e+00  2.4790e+00  6.9000e-02  1.9600e-01  4.3000e-01  8.7500e-01
  1.3000e-01  3.9700e-01  8.7300e-01  1.6110e+00 -2.1000e-02 -1.2000e-02
  2.6000e-02  7.3000e-02  1.5100e-02  1.0100e-01  2.5600e-01  5.0800e-01]
Spearman Rho Coefficient: 0.0598
P-value: 0.7812


In [40]:
# Ent Diff Qwen vs Proba

qwen_sw = [0.3,0.40,0.45,0.42,0.47,0.35]
qwen_rand1 = [0.4,0.4,0.37,0.38,0.4,0.37]
qwen_rand2 = [0.4,0.4,0.39,0.38,0.4,0.37]
qwen_rand3 = [0.42,0.4,0.44,0.39,0.39,0.4]
qwen_rand4 = [0.43,0.4,0.39,0.37,0.41,0.43]
qwen_rand5 = [0.37,0.4,0.39,0.39,0.45,0.43]
qwen_proba = [qwen_sw, qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

get_corr(qwen_proba, entropy_qwen, x_title, y_title, out_path)

[ 0.05  0.02  0.07 -0.05 -0.03 -0.02  0.   -0.03 -0.01 -0.02  0.   -0.03
  0.04 -0.01 -0.01  0.   -0.01 -0.03  0.01  0.03 -0.01 -0.01  0.05  0.03]
[ 1.0900e+00  8.0970e+00  1.5472e+01  1.8550e+01  1.6400e-01  5.5000e-01
  1.3100e+00  2.4790e+00  6.9000e-02  1.9600e-01  4.3000e-01  8.7500e-01
  1.3000e-01  3.9700e-01  8.7300e-01  1.6110e+00 -2.1000e-02 -1.2000e-02
  2.6000e-02  7.3000e-02  1.5100e-02  1.0100e-01  2.5600e-01  5.0800e-01]
Spearman Rho Coefficient: 0.0673
P-value: 0.7547


In [13]:
js_div_qwen[1:]

[[0.01, 0.05, 0.13, 0.24],
 [0.005, 0.02, 0.048, 0.094],
 [0.009, 0.038, 0.091, 0.168],
 [0.004, 0.015, 0.03, 0.046],
 [0.008, 0.025, 0.045, 0.067]]

In [16]:
# BLEU
llama_sw = [0.266, 0.255, 0.045, 0.046, 0.045, 0.046]
llama_rand1 = [0.253, 0.255, 0.253,0.250, 0.243, 0.241]
llama_rand2 = [0.259, 0.255, 0.244, 0.212, 0.169, 0.241]
llama_rand3 = [0.250, 0.255, 0.251, 0.244, 0.225, 0.209]
llama_rand4 = [0.253, 0.255, 0.247, 0.241, 0.223, 0.205]
llama_rand5 = [0.182, 0.255, 0.270, 0.264, 0.260, 0.258]
llama_rand = [llama_rand1, llama_rand2, llama_rand3, llama_rand4, llama_rand5]

qwen_sw = [0.011, 0.064, 0.064, 0.004, 0.042, 0.042]
qwen_rand1 = [0.003, 0.064, 0.064,0.059, 0.062, 0.060]
qwen_rand2 = [0.062, 0.064, 0.062, 0.063, 0.067, 0.068]
qwen_rand3 = [0.061, 0.064, 0.063, 0.058, 0.058, 0.055]
qwen_rand4 = [0.063, 0.064, 0.062, 0.063, 0.063, 0.064]
qwen_rand5 = [0.063, 0.064, 0.067, 0.060, 0.064, 0.064]
qwen_rand = [qwen_rand1, qwen_rand2, qwen_rand3, qwen_rand4, qwen_rand5]

labels = ['llama-3.2-3b-instruct_sw-points', 'llama-3.2-3b-instruct_random', 'qwen-7b-instruct_sw-points', 'qwen-7b-instruct_random']
all_scores = [entropy_llama[0], entropy_llama[1:], entropy_qwen[0], entropy_qwen[1:]]
metric_name = 'Tokens Entropy Difference After Intervention'
out_path = '../figures/entropy-multiplier-effects'
visualize_scores(all_scores, labels, metric_name, out_path)
