In [25]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from statsmodels.stats.multitest import multipletests


In [26]:
REPLICATION = True

file_path_add = ''
if REPLICATION:
    file_path_add = 'replication/'

In [27]:
models = ['country_based','topic based','random']


In [28]:
index = 'token_pairs'

## Evaluating on WVS

In [30]:
list_rows = []
for model in models:
    pew_gpt2 = pd.read_csv(f'../data/{file_path_add}wvs_w7_gpt2_{model}_on_pew_{index}_in.csv')
    pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
    pew_gpt2_cultural = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['wvs_score'])]
    r, p = (scipy.stats.pearsonr(pew_gpt2_cultural['wvs_score'], pew_gpt2_cultural['log prob difference']))
    row = {'model':'gpt2','train_data' : 'PEW', 'eval_data': 'WVS',
          'strategy': model, 'r': r, 'p': p, 'n': len(pew_gpt2_cultural)}
    list_rows.append(row)


In [31]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,PEW,WVS,country_based,0.328298,8.829486000000001e-27,1028
1,gpt2,PEW,WVS,topic based,0.451231,3.1270420000000003e-52,1028
2,gpt2,PEW,WVS,random,0.463275,2.404414e-55,1028


## Evaluating on PEW
we have to remove seen pairs from evaluation set

In [32]:
pew_topics = {'use contraceptives':'using contraceptives',
           'get a divorce':'getting a divorce', 
            'have an abortion': 'having an abortion',
            'be homosexual': 'homosexuality', 
           'drink alcohol': 'drinking alcohol',
           'have an extramarital affair': 'married people having an affair' ,
             'gamble': 'gambling',
       'have sex between unmarried adults':'sex between unmarried adults'
                         }
pew_topics_list = list(pew_topics.keys())



In [33]:
def included_function(pairs):
    def func(row):
        return (row['country'], pew_topics[row['topic']]) in pairs
    return func

In [35]:
all_eval_pairs = pickle.load(open(f'../data/{file_path_add}pew_eval_pairs.p', 'rb'))
list_rows = []
for model in models:
    eval_pairs = all_eval_pairs[model]
    pew_gpt2 = pd.read_csv(f'../data/{file_path_add}pew_gpt2_{model}_on_pew_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
    
    pew_gpt2_cultural = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['pew_score'])]
    pew_gpt2_cultural['in_eval'] = pew_gpt2_cultural.apply(included_function(eval_pairs), axis = 1)
    pew_gpt2_cultural = pew_gpt2_cultural.loc[pew_gpt2_cultural.in_eval == True]

    r, p = scipy.stats.pearsonr(pew_gpt2_cultural['pew_score'], pew_gpt2_cultural['log prob difference'])
    row = {'model':'gpt2','train_data' : 'PEW', 'eval_data': 'PEW',
          'strategy': model, 'r': r, 'p': p, 'n': len(pew_gpt2_cultural)}
    list_rows.append(row)

In [36]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,PEW,PEW,country_based,0.737458,1.097395e-11,64
1,gpt2,PEW,PEW,topic based,0.683887,1.548872e-11,78
2,gpt2,PEW,PEW,random,0.840507,2.0963410000000002e-17,63


# Variation study

### WVS

In [37]:
topics = ['claiming government benefits to which you are not entitled',
 'avoiding a fare on public transport',
 'stealing property',
 'cheating on taxes',
 'someone accepting a bribe in the course of their duties',
 'homosexuality',
 'prostitution',
 'abortion',
 'divorce',
 'sex before marriage',
 'suicide',
 'euthanasia',
 'for a man to beat his wife',
 'parents beating children',
 'violence against other people',
 'terrorism as a political, ideological or religious mean',
 'having casual sex',
 'political violence',
 'death penalty']

In [38]:
variation_rows = []

for i, model in enumerate(models):
    if model == 'topic based':
        model_refined = 'topic_based'
    elif model == 'removed topics':
        model_refined = 'removed_topic'
    else:
        model_refined = model
    wvs_gpt2 = pd.read_csv(f'../data/{file_path_add}wvs_w7_gpt2_{model}_on_pew_{index}_in.csv')
    wvs_gpt2_cultural = wvs_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = wvs_gpt2_cultural.loc[~pd.isna(wvs_gpt2_cultural['wvs_score'])]

    for t in topics:

        pew_gpt2_t = df.loc[df.topic == t]
        user_var = np.var(pew_gpt2_t['wvs_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   


In [39]:
list_rows = []
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model] 
    r, p = scipy.stats.pearsonr(model_df['model variation'],model_df['user variation'])
    
    row = {'strategy': model, 'r': r, 'p':p,
          'n': len(model_df)}
    list_rows.append(row)

In [40]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,n
0,country_based,0.930065,2.470066e-08,19
1,topic based,0.928679,2.904522e-08,19
2,random,0.952398,1.013767e-09,19


## PEW

In [41]:
pew_topics = {'using contraceptives':['using contraceptives', 'use contraceptives'],
           'getting a divorce': ['getting a divorce','get a divorce'], 
           'having an abortion': ['having an abortion','have an abortion'],
           'homosexuality': ['homosexuality','be homosexual'], 
           'drinking alcohol': ['drinking alcohol','drink alcohol'],
           'married people having an affair' : ['married people having an affair', 'have an extramarital affair'],
            'gambling': ['gambling','gamble'],
       'sex between unmarried adults': [ 'sex between unmarried adults','have sex between unmarried adults']
                         }

pew_topics_list = list(pew_topics.keys())

variation_rows = []

for i, model in enumerate(models):
    if model == 'topic based':
        model_refined = 'topic_based'
    elif model == 'removed topics':
        model_refined = 'removed_topic'
    else:
        model_refined = model
    pew_gpt2 = pd.read_csv(f'../data/{file_path_add}pew_gpt2_{model}_on_pew_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['pew_score'])]

    for t in pew_topics:

        pew_gpt2_t = df.loc[df.topic.isin(pew_topics[t])]
        user_var = np.var(pew_gpt2_t['pew_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   




In [42]:
list_rows = []
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model]
    

    slope, intercept, r, p, std_err =\
    scipy.stats.linregress(model_df['model variation'],model_df['user variation'])
    
    
    row = {'strategy': model, 'r': r, 'p':p,'slope':slope,
          'n': len(model_df)}
    list_rows.append(row)
    


In [43]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,slope,n
0,country_based,0.927527,0.002702,0.311446,8
1,topic based,0.992855,3e-06,0.217214,8
2,random,0.948208,0.001002,0.179873,8
