In [1]:
# Based on the code from https://github.com/liang-jenny/nlpositionality

In [2]:
import pandas as pd
from scipy import stats

In [3]:
import value_aggregation as pm

In [4]:
df = pd.read_csv('../../external_data/nlpositionality/social-acceptability_processed.csv')

# Available attributes:
demographics = ['country_longest', 'education', 'ethnicity', 'gender',
                'native_language', 'age', 'country_residence',
                'religion']

### Finding the groups which are least similar

In [5]:
# Currently just comparing each group with the negation of that group
# Test on the groups with the least correlation

model = 'delphi'
correlations = {}

for demographic in demographics:
    correlations[demographic] = {}
    for category in df[demographic].unique(): 
    
        in_category_df = df[df[demographic] == category]
        in_category_grouped = (in_category_df.groupby(by=['action'])
                              .mean(numeric_only=True)[['litw', model]]
                              .reset_index())

        not_in_category_df = df[df[demographic] != category]
        # can't do the mean on just the 'litw' column b/c it is a group by operation
        not_in_category_grouped = (not_in_category_df.groupby(by=['action'])
                                   .mean(numeric_only=True)[['litw', model]]
                                   .reset_index())
        
        merged = in_category_grouped.merge(not_in_category_grouped, on=['action',model])

        if len(merged) >= 2:
            correlations[demographic][category] = {}
            correlations[demographic][category]['size'] = len(merged)
            correlations[demographic][category]['corr'] = stats.pearsonr(merged['litw_x'], merged['litw_y'])


In [6]:
# And looking for those groups with have the least correlation with their negation

for demographic in demographics:
    max_val = min(correlations[demographic], key= lambda x: correlations[demographic][x]['corr'])
    print(f"{demographic}: {max_val} {correlations[demographic][max_val]}")

# education: professional school vs. not professional school seems like the least correlation

country_longest: African-Islamic {'size': 74, 'corr': PearsonRResult(statistic=0.6951044458607152, pvalue=6.370247074591044e-12)}
education: professional school {'size': 189, 'corr': PearsonRResult(statistic=0.6575190998119482, pvalue=8.962644001682762e-25)}
ethnicity: black {'size': 237, 'corr': PearsonRResult(statistic=0.7740744803370317, pvalue=1.4827155219016302e-48)}
gender: non-binary {'size': 280, 'corr': PearsonRResult(statistic=0.7968998288815363, pvalue=8.468066924683591e-63)}
native_language: not english {'size': 264, 'corr': PearsonRResult(statistic=0.8484484719454016, pvalue=2.3349426316315795e-74)}
age: > 80 {'size': 25, 'corr': PearsonRResult(statistic=0.6589700650136484, pvalue=0.00034067898041322957)}
country_residence: African-Islamic {'size': 68, 'corr': PearsonRResult(statistic=0.6171432995658224, pvalue=2.085419867057657e-08)}
religion: buddhist {'size': 117, 'corr': PearsonRResult(statistic=0.7371188337149057, pvalue=2.6693808345252273e-21)}


### Putting those groups into our framework

In [7]:
def scenario_to_game(df, scenario, demographic, categories):
    """
    Constructs a GameState for the given scenario demographic and categories.
    """
    actions = [-2.0, -1.0, 0.0, 1.0, 2.0]
    # -2 -1 0 1 2
    # "it's very bad" "it's bad" "it's okay" "it's good" "it's very good"

    credences = {}
    outcomes = {}

    for action in actions:
        outcomes[action] = {}
        
    # This is redundant when called many times inside of a loop, but is not 
    # that expensive
    sum_in_categories = 0
    for category in categories:
        sum_in_categories += len(df[df[demographic] == category])

    for category in categories:
        agent_df = df[df[demographic] == category]

        agent_scenario_df = agent_df[agent_df['action'] == scenario]
        # I don't have a good reason why this is the threshold
        if len(agent_scenario_df) < 5:
            continue

        # TODO: there is a rounding error here
        credences[category] = len(agent_df) / sum_in_categories

        for action in actions:
            # outcomes: the percentage of each group who labeled each outcome.
            outcomes[action][category] = (len(agent_scenario_df[agent_scenario_df['litw'] 
                                                             == action]) /
                                          len(agent_scenario_df))
        
    gameState = pm.VoteGameState(credences, outcomes)
    return gameState


In [8]:
def count_disagreements(df, demographic, categories, size=1):    
    scenario_disagrees = {}
    
    for scenario in df['action'].unique():
        
        gameState = scenario_to_game(df, scenario, demographic, categories)

        mec_result = pm.run_mec(gameState)
        bargain_result = pm.run_nash_bargain(gameState)
        # This is taking too long. Try again outside of a notebook
        # maximax_result = pm.run_expectimax(gameState)
        
        scenario_disagrees[scenario] = {}
    
        scenario_disagrees[scenario]['mec-bar'] = abs(mec_result - bargain_result) >= size

    return scenario_disagrees

In [23]:
# pre-processing disagreements for all demographics
# this takes a while

demographic_disagreements = {}
for demographic in demographics:    
    categories = df[demographic].unique()
    scenario_disagrees = count_disagreements(df, demographic, categories)
    demographic_disagreements[demographic] = scenario_disagrees

In [24]:
def disagrees_only(the_dict, sub_key):
    return list(filter(lambda x: the_dict[x][sub_key], the_dict.keys()))

def agrees_only(the_dict, sub_key):
    return list(filter(lambda x: not the_dict[x][sub_key], the_dict.keys()))

def max_disagreement(demographic_disagreements, disagree_type):
    # Looking for the demographic in which there is most disagreement in the formal models
    max_disagreement = 0.0
    max_disagreement_demographic = None

    for demographic in demographic_disagreements.keys():

        # The percentage of scenarios for these categories in which we can find a
        # disagreement in the models 
        bar_disagreeing_scenarios_only = disagrees_only(demographic_disagreements[demographic],
                                                        disagree_type)

        this_disagreement = len(bar_disagreeing_scenarios_only) / len(scenario_disagrees)

        if max_disagreement_demographic == None or max_disagreement < this_disagreement:
            max_disagreement = this_disagreement
            max_disagreement_demographic = demographic
    return max_disagreement_demographic

In [30]:
def disagree_stats(category):
    print(f"%: {len(disagrees_only(demographic_disagreements[category], 'mec-bar')) / len(demographic_disagreements[category])}")
    print(f"count: {len(disagrees_only(demographic_disagreements[category], 'mec-bar'))}")
    print(f"n: {len(demographic_disagreements[category])}")

### This is the stat which we estimate in our paper

In [28]:
max_disagreement(demographic_disagreements, 'mec-bar')

'education'

In [32]:
disagree_stats('education')

%: 0.010309278350515464
count: 3
n: 291


## Walking through a couple of examples of disagreement

In [143]:
demographic_disagreements.keys()

dict_keys(['country_longest', 'education', 'ethnicity', 'gender', 'native_language', 'age', 'country_residence', 'religion'])

In [144]:
demographic = 'age'
scenario_disagrees = demographic_disagreements[demographic]
disagreeing_scenarios_only = disagrees_only(scenario_disagrees, 'mec-bar')
categories = df[demographic].unique()

In [147]:
disagreeing_scenarios_only[0]

'children doing risky things.'

In [148]:
game = scenario_to_game(df, disagreeing_scenarios_only[0], demographic, categories)
game

{"beliefs" : {'20-30': 0.4253723677452491, '10-20': 0.35285053929121724}, "outcomes" : {-2.0: {'20-30': 0.0, '10-20': 0.23076923076923078}, -1.0: {'20-30': 0.42857142857142855, '10-20': 0.3076923076923077}, 0.0: {'20-30': 0.5714285714285714, '10-20': 0.15384615384615385}, 1.0: {'20-30': 0.0, '10-20': 0.23076923076923078}, 2.0: {'20-30': 0.0, '10-20': 0.07692307692307693}}, "actions_taken" : []}

In [None]:
{"beliefs" : {'20-30': 0.4253723677452491,
              '10-20': 0.35285053929121724},
 "outcomes" : {-2.0: {'20-30': 0.0,
                      '10-20': 0.23076923076923078},
               -1.0: {'20-30': 0.42857142857142855,
                      '10-20': 0.3076923076923077},
               0.0: {'20-30': 0.5714285714285714,
                     '10-20': 0.15384615384615385},
               1.0: {'20-30': 0.0,
                     '10-20': 0.23076923076923078},
               2.0: {'20-30': 0.0,
                     '10-20': 0.07692307692307693}},
 "actions_taken" : []}

In [149]:
pm.run_mec(game) # 0.0

0.0

In [150]:
pm.run_nash_bargain(game) # -1.0

-1.0

In [80]:
"""
{-2.0: {'20-30': 0.0, '10-20': 0.024390243902439025, '40-50': 0.0, '30-40': 0.1},
-1.0: {'20-30': 0.02127659574468085, '10-20': 0.024390243902439025, '40-50': 0.0, '30-40': 0.1},
0.0: {'20-30': 0.2765957446808511, '10-20': 0.3902439024390244, '40-50': 0.42857142857142855, '30-40': 0.7},
1.0: {'20-30': 0.2127659574468085, '10-20': 0.24390243902439024, '40-50': 0.14285714285714285, '30-40': 0.0},
2.0: {'20-30': 0.48936170212765956, '10-20': 0.3170731707317073, '40-50': 0.42857142857142855, '30-40': 0.1}}
"""

"\n{-2.0: {'20-30': 0.0, '10-20': 0.024390243902439025, '40-50': 0.0, '30-40': 0.1},\n-1.0: {'20-30': 0.02127659574468085, '10-20': 0.024390243902439025, '40-50': 0.0, '30-40': 0.1},\n0.0: {'20-30': 0.2765957446808511, '10-20': 0.3902439024390244, '40-50': 0.42857142857142855, '30-40': 0.7},\n1.0: {'20-30': 0.2127659574468085, '10-20': 0.24390243902439024, '40-50': 0.14285714285714285, '30-40': 0.0},\n2.0: {'20-30': 0.48936170212765956, '10-20': 0.3170731707317073, '40-50': 0.42857142857142855, '30-40': 0.1}}\n"