In [1]:
import os
import json
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance
import helpers as ph
import seaborn as sns
import dataframe_image as dfi

styles = ph.VIS_STYLES



In [2]:
RESULTS_DIR = f'./data/distributions/'
CONTEXT = 'default'
SAVEFIG = True

## Load human and LM opinion distributions

In [4]:
combined_df, human_df = [], []
for wave in ph.PEW_SURVEY_LIST:
    SURVEY_NAME = f'American_Trends_Panel_W{wave}'

    cdf = pd.read_csv(os.path.join(RESULTS_DIR, f'{SURVEY_NAME}_{CONTEXT}_combined.csv'))
    cdf['survey'] = f'ATP {wave}'
    combined_df.append(cdf)
    
    hdf = pd.read_csv(os.path.join(RESULTS_DIR, f'{SURVEY_NAME}_baseline.csv'))
    hdf['survey'] = f'ATP {wave}'
    human_df.append(hdf)
combined_df, human_df = pd.concat(combined_df), pd.concat(human_df)
combined_df['Source'] = combined_df.apply(lambda x: 'AI21 Labs' if 'j1-' in x['model_name'].lower() else 'OpenAI',
                                          axis=1)

## Compare refusals

In [5]:
KEYS = ['Source', 'model_name', 'attribute', 'model_order']

model_refusals = combined_df[combined_df['group'] == 'Overall']
model_refusals = model_refusals.groupby(KEYS, as_index=False).agg({'R_M': lambda x: 100 * np.mean(x)}) \
         .sort_values(by=['model_order']).rename(columns={'R_M': 'Refusal'})
human_refusals = combined_df.groupby(['group', 'group_order'], as_index=False) \
                .agg({'R_H':  lambda x: 100 * np.mean(x)}).rename(columns={'R_H': 'Refusal'})
human_refusals['Source'] = 'humans'
human_refusals['model_name'] = 'overall'

In [11]:
!playwright install-deps

In [14]:
refusal_table = pd.concat([human_refusals[human_refusals['group'] == 'Overall'],
                           model_refusals]) \
                .rename(columns={'model_name': ''})

refusal_table = pd.pivot_table(refusal_table, 
                       columns=['Source', ''], 
                       values="Refusal", 
                       sort=False)
table_vis = refusal_table.style.background_gradient("Reds", axis=1)\
                        .set_table_styles(ph.VIS_STYLES)  \
                        .set_properties(**{"font-size":"0.7rem"}).format(precision=3)
#if SAVEFIG: table_vis.hide().export_png(f'./figures/refusals.png')
display(table_vis)

Source,humans,AI21 Labs,AI21 Labs,AI21 Labs,OpenAI,OpenAI,OpenAI,OpenAI,OpenAI,OpenAI
Unnamed: 0_level_1,overall,j1-grande,j1-jumbo,j1-grande-v2-beta,ada,davinci,text-ada-001,text-davinci-001,text-davinci-002,text-davinci-003
Refusal,1.538,21.209,13.171,13.147,17.076,13.729,16.447,1.75,3.778,2.004
