# persuasiveness

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import os 

csv_files = [
    "./Statistics/data/1b_vanilla_scores.csv",
    "./Statistics/data/8b_vanilla_scores.csv",
    "./Statistics/data/405b_scores.csv",
    "./Statistics/data/1b_distil_scores.csv",
    "./Statistics/data/8b_distil_scores.csv"
]

data = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file)
    
    group_name = os.path.basename(file).split('_scores')[0]
    df['group'] = group_name 
    
    data = pd.concat([data, df], ignore_index=True)

display(data)

model = ols('score ~ group', data=data).fit()
anova_table = sm.stats.anova_lm(model)

print(anova_table)

Unnamed: 0,score,group
0,3.40,1b_vanilla
1,2.70,1b_vanilla
2,2.80,1b_vanilla
3,2.10,1b_vanilla
4,3.15,1b_vanilla
...,...,...
7516,4.30,8b_distil
7517,1.00,8b_distil
7518,1.55,8b_distil
7519,4.05,8b_distil


              df       sum_sq     mean_sq           F  PR(>F)
group        4.0  1096.512703  274.128176  600.678142     0.0
Residual  7516.0  3430.035530    0.456364         NaN     NaN


In [27]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np

tukey_result = pairwise_tukeyhsd(endog=data['score'], groups=data['group'], alpha=0.05)
np.set_printoptions(precision=3)

print(tukey_result)

    Multiple Comparison of Means - Tukey HSD, FWER=0.05     
  group1     group2   meandiff p-adj   lower   upper  reject
------------------------------------------------------------
 1b_distil 1b_vanilla  -0.7025    0.0   -0.77  -0.635   True
 1b_distil       405b   0.2711    0.0  0.2021  0.3401   True
 1b_distil  8b_distil   0.3416    0.0  0.2713  0.4119   True
 1b_distil 8b_vanilla   0.0865 0.0046  0.0187  0.1543   True
1b_vanilla       405b   0.9736    0.0  0.9078  1.0393   True
1b_vanilla  8b_distil   1.0441    0.0   0.977  1.1112   True
1b_vanilla 8b_vanilla    0.789    0.0  0.7245  0.8535   True
      405b  8b_distil   0.0706 0.0402   0.002  0.1392   True
      405b 8b_vanilla  -0.1846    0.0 -0.2506 -0.1186   True
 8b_distil 8b_vanilla  -0.2551    0.0 -0.3225 -0.1877   True
------------------------------------------------------------


---

# dominance

In [13]:
dominance = pd.read_csv("./Statistics/final_price_label.csv")
dominance = dominance[["405b_dominance", "8b_zeroshot_dominance", "1b_zeroshot_dominance", "8b_distil_dominance", "1b_distil_dominance"]]
display(dominance.head())

Unnamed: 0,405b_dominance,8b_zeroshot_dominance,1b_zeroshot_dominance,8b_distil_dominance,1b_distil_dominance
0,0.5,,0.0,0.431877,0.06193
1,0.27427,0.180019,0.651272,0.36852,0.462771
2,0.436308,0.360031,0.239512,0.360031,
3,,0.097685,0.108538,0.314761,0.0
4,0.5,1.0,1.0,,1.0


In [16]:
dominance.count()

405b_dominance           297
8b_zeroshot_dominance    317
1b_zeroshot_dominance    296
8b_distil_dominance      265
1b_distil_dominance      270
dtype: int64

In [14]:
data_long = dominance.melt(var_name='group', value_name='score')

data_long['group'] = data_long['group'].str.split('_dominance').str[0]
data_long = data_long.dropna()

print(data_long)

model = ols('score ~ group', data=data_long).fit()
anova_table = sm.stats.anova_lm(model)

pd.options.display.float_format = '{:.4f}'.format
# pd.reset_option('display.float_format')

print(anova_table)

          group     score
0          405b  0.500000
1          405b  0.274270
2          405b  0.436308
4          405b  0.500000
5          405b  0.500000
...         ...       ...
1628  1b_distil  0.033349
1629  1b_distil  0.217869
1630  1b_distil  0.287041
1632  1b_distil  0.000000
1634  1b_distil  0.551570

[1445 rows x 2 columns]
                df   sum_sq  mean_sq       F  PR(>F)
group       4.0000  21.5249   5.3812 53.5370  0.0000
Residual 1440.0000 144.7404   0.1005     NaN     NaN


In [41]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np

tukey_result = pairwise_tukeyhsd(endog=data_long['score'], groups=data_long['group'], alpha=0.05)
np.set_printoptions(precision=3)

print(tukey_result)

     Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower   upper  reject
--------------------------------------------------------------
  1b_distil 1b_zeroshot  -0.1184 0.0001 -0.1912 -0.0455   True
  1b_distil        405b   0.2129    0.0  0.1401  0.2857   True
  1b_distil   8b_distil   0.1365    0.0  0.0616  0.2113   True
  1b_distil 8b_zeroshot   0.1606    0.0  0.0889  0.2323   True
1b_zeroshot        405b   0.3313    0.0  0.2601  0.4024   True
1b_zeroshot   8b_distil   0.2548    0.0  0.1816  0.3281   True
1b_zeroshot 8b_zeroshot    0.279    0.0   0.209   0.349   True
       405b   8b_distil  -0.0764 0.0355 -0.1496 -0.0033   True
       405b 8b_zeroshot  -0.0523 0.2468 -0.1222  0.0177  False
  8b_distil 8b_zeroshot   0.0242 0.8909 -0.0479  0.0962  False
--------------------------------------------------------------
