# Multi-Agent Simulations of (Argumentative) Belief Dynamics with LMs

## General settings

In [65]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import logging

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

In [66]:
!nvidia-smi

Thu Feb 25 16:26:41 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.06    Driver Version: 450.51.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:3A:00.0 Off |                    0 |
| N/A   31C    P0    51W / 300W |   2291MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Topic 

In [167]:
topics = [
    {
        'id': 'compulsory_voting',
        'intro': 'Let\'s discuss compulsory voting! Should citizens be free to abstain from voting? Or should voting be made compulsory?',
        'prompt': 'I more or less agree with what my peers are saying here. Regarding the idea of compulsory voting, I\'d just add the following thought:',
        'initial_posts': [
            { 
                'type':'pro',
                'text':'Turnout in British election is distressingly low. Compulsory voting might fix this problem and increase the legitimacy of our government.'
            },
            {
                'type':'pro',
                'text':'Voting is compulsory in other countries, for instance in Australia. We should adopt the same system to secure greater democratic involvement.'
            },
            {
                'type':'pro',
                'text':'Proxy voting and postal voting will be available for those who cannot pgysically get to the polling station – voting by the Internet could also be arranged for.'
            },
            { 
                'type':'con',
                'text':'There are many reasons why people do not vote. Up to 10 per cent of the population is not on the electoral register at any one time.'
            },
            {
                'type':'con',
                'text':'Many people cannot get away from their work, or find someone to look after their children. And that\'s the reason why they don\'t vote.',
            },
            {
                'type':'con',
                'text':'Some people who don\'t vote cannot get physically to the polling booth. Others are simply not interested in politics.'
            },
            {
                'type':'con',
                'text':'There are many reasons why people don\'t vote. None of these motivations can be affected by forcing people to vote – those who cannot will continue not to, and those who are not interested will vote randomly.'
            },
            {
                'type':'pro',
                'text':'In the 1998 local elections, turnout averaged no more than 30 per cent, and in some areas under 20 per cent. Even in the 1997 general election, almost 30 per cent of the population did not vote.'
            }
            
        ]
    }
]

### Model and Tokenizer

In [168]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-large')
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
model.to("cuda")
logging.getLogger('transformers.tokenization_utils').setLevel(logging.ERROR)

In [169]:
NEWLINE_TOKENID = tokenizer("I\n")['input_ids'][-1]
NEWLINE_TOKENID

198

In [170]:
EOS_TOKENIDS = [tokenizer(c)['input_ids'][0] for c in "!?."]
EOS_TOKENIDS

[0, 30, 13]

In [171]:
ETC_TOKENID = tokenizer("...")['input_ids'][-1]
ETC_TOKENID

986

In [172]:
XA0_TOKENID = tokenizer("A\xa0")['input_ids'][-1]
XA0_TOKENID

1849

### Global parameters 

In [173]:
global_parameters = {
    'topic':'compulsory_voting',
    'n_agents':16,
    'max_t':15,
    'n_initial_posts':2, # number of initial posts, simulaion starts at t = n_initial_posts 
    'context_size':15, # max number of posts that are included in an agents context / prompt
    'relevance_deprecation':.5, # how much more likely (factor) is it the agents retains a post dating from (t-k-1) than one dating from (t-k)
    'self_confidence':4, # how much more likely (factor) is it the agents retains a post from her own than from a peer 
    'peer_selection_method':'closed_rooms'
}

peer_selection_parameters = [
    {
        'id':'closed_rooms',
        'by_roomsize': False, # Determine number of rooms by n_agents/agents_per_room
        'n_rooms':4,
        'agents_per_room':4
    }
]

decoding_parameters = {
    'do_sample':True, 
    'top_p':0.9, 
    'top_k':0,
    'max_length':35,
    'bad_words_ids':[[NEWLINE_TOKENID]]
}

## Define Dynamics

### Peer selection

In [174]:
peer_sel_param = next(p for p in peer_selection_parameters if p['id']==global_parameters['peer_selection_method'])

if peer_sel_param['id']=='closed_rooms':
    n_rooms = round(global_parameters['n_agents']/peer_sel_param['agents_per_room']) if peer_sel_param['by_roomsize'] else peer_sel_param['n_rooms']
    #assign each agent a room
    room_assignments = random.choices(list(range(n_rooms)),k=global_parameters['n_agents'])
    #print(room_assignments)
    peers = [[j for j,r in enumerate(room_assignments) if r==room_assignments[i]] for i in range(global_parameters['n_agents'])]
    #print(peers)
    
    def peer_selection(df,t):
        return peers

### Context creation

In [175]:
def context_creation(df,t):
    c_max = global_parameters['context_size']
    delta = global_parameters['relevance_deprecation']
    context_old = df.loc[t-1]['context'] # start with context from previous step
    context_old = context_old.rename('old')
    context_new = df.loc[t]['peers'].apply(lambda peers: [[t-1,p] for p in peers])# all the new stuff the peers have been saying at t
    context_new = context_new.rename('new')
    
    def merge_and_drop(row):
        con = row.new + row.old
        if len(con)>global_parameters['context_size']:
            con_sampled = []
            for i in range(global_parameters['context_size']):
                weights = [(global_parameters['self_confidence'] if x[1]==row.agent else 1) * (delta**(t-x[0])) for x in con]
                #print(weights)
                retain = random.choices(con,k=1,weights=weights)[0]
                con.remove(retain)
                con_sampled.append(retain)
            con = con_sampled
        return con
    contexts = pd.concat([context_old,context_new,pd.Series(agents,name='agent')],axis=1).apply(merge_and_drop, axis=1)
    return contexts

### Generation of posts

In [176]:
def posts_creation(df,t):
    def generate_post(row):
        # collect_and_glue_context_tokens
        tokens = topic['intro_tokens']
        tokens = tokens + [NEWLINE_TOKENID]
        for c in row.context:
            tokens = tokens + (df.loc[c[0],c[1]]['tokens'])
            tokens = tokens + [NEWLINE_TOKENID]
        tokens = tokens + topic['prompt_tokens']
        
        params = decoding_parameters.copy()
        
        params['max_length'] = params['max_length']+len(tokens)
        
        output = model.generate(
            torch.tensor([tokens]).to('cuda'),
            **params
        )
        
        gen_tokens = output.tolist()[0][len(tokens):]
        
        # post process: search for and stop at last EOS
        eos_idx = [i for i,token in enumerate(gen_tokens) if token in EOS_TOKENIDS]   
        if len(eos_idx)>0:
            gen_tokens = gen_tokens[:(eos_idx[-1]+1)]
        else:
            gen_tokens = gen_tokens + [ETC_TOKENID]
        
        # decode
        gen_text = {'text':tokenizer.decode(gen_tokens)}
        return pd.Series({'post':gen_text,'tokens':gen_tokens})
        
    df_posts_tokens = df.loc[t].apply(generate_post, axis=1)
    
    return df_posts_tokens

## Initialization

In [177]:
topic = next(t for t in topics if t['id']==global_parameters['topic'])
topic['intro_tokens'] = tokenizer(topic['intro'])['input_ids']
topic['prompt_tokens'] = tokenizer(topic['prompt'])['input_ids']

We store all the results in hiarchically indexed dataframe.

In [178]:
columns = ['post','peers','context','tokens']
steps = np.arange(global_parameters['max_t'])
agents = np.arange(global_parameters['n_agents'])
steps_agents = [
   steps,
   agents
]
index = pd.MultiIndex.from_product(steps_agents, names=["step", "agent"])
df = pd.DataFrame(np.zeros((global_parameters['max_t']*global_parameters['n_agents'], len(columns))), index=index, columns=columns)
df = df.astype(object)
df['context']=[[] for i in range(len(df))]
df['peers']=[[] for i in range(len(df))]

We initialize peer groups.

In [179]:
# initialize peers
for t in range(global_parameters['n_initial_posts']):
    df.loc[t]['peers']=peer_selection(df,t)

We initialize context.

In [180]:
for t in range(1,global_parameters['n_initial_posts']):
    df.loc[t]['context']=context_creation(df,t)

We initialize posts (from topic)

In [181]:
for t in range(global_parameters['n_initial_posts']):
    #new_series = pd.Series(
    #    random.choices(topic['initial_posts'],k=global_parameters['n_agents']),
    #    index = pd.RangeIndex(global_parameters['n_agents'], name='agent')
    #)
    df.loc[t]['post'] = random.choices(topic['initial_posts'],k=global_parameters['n_agents'])
    df.loc[t]['tokens'] = df.loc[t]['post'].apply(lambda p: tokenizer(p['text'])['input_ids'])

In [182]:
df.loc[:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,post,peers,context,tokens
step,agent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,"{'type': 'con', 'text': 'There are many reason...","[0, 4, 5, 9, 11, 12, 14]",[],"[1858, 389, 867, 3840, 1521, 661, 466, 407, 30..."
0,1,"{'type': 'con', 'text': 'There are many reason...","[1, 6, 8, 13]",[],"[1858, 389, 867, 3840, 1521, 661, 836, 470, 30..."
0,2,"{'type': 'pro', 'text': 'Turnout in British el...","[2, 7, 15]",[],"[17278, 448, 287, 3517, 3071, 318, 17087, 4420..."
0,3,"{'type': 'con', 'text': 'There are many reason...","[3, 10]",[],"[1858, 389, 867, 3840, 1521, 661, 836, 470, 30..."
0,4,"{'type': 'pro', 'text': 'In the 1998 local ele...","[0, 4, 5, 9, 11, 12, 14]",[],"[818, 262, 7795, 1957, 7024, 11, 21666, 16449,..."
0,5,"{'type': 'con', 'text': 'Many people cannot ge...","[0, 4, 5, 9, 11, 12, 14]",[],"[7085, 661, 2314, 651, 1497, 422, 511, 670, 11..."
0,6,"{'type': 'con', 'text': 'Some people who don't...","[1, 6, 8, 13]",[],"[4366, 661, 508, 836, 470, 3015, 2314, 651, 10..."
0,7,"{'type': 'pro', 'text': 'Proxy voting and post...","[2, 7, 15]",[],"[44148, 6709, 290, 30793, 6709, 481, 307, 1695..."
0,8,"{'type': 'con', 'text': 'There are many reason...","[1, 6, 8, 13]",[],"[1858, 389, 867, 3840, 1521, 661, 836, 470, 30..."
0,9,"{'type': 'con', 'text': 'Many people cannot ge...","[0, 4, 5, 9, 11, 12, 14]",[],"[7085, 661, 2314, 651, 1497, 422, 511, 670, 11..."


# Loop

In [183]:
for t in tqdm(range(global_parameters['n_initial_posts'],global_parameters['max_t'])):
    # Determine peers
    df.loc[t]['peers']=peer_selection(df,t)
    # Determine contexts
    df.loc[t]['context']=context_creation(df,t)
    # Generate posts
    df_posts_tokens=posts_creation(df,t)
    df.loc[t]['post']=df_posts_tokens['post']
    df.loc[t]['tokens']=df_posts_tokens['tokens']

  0%|          | 0/13 [00:00<?, ?it/s]Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_

In [200]:
idx = pd.IndexSlice
df.loc[idx[:,[13]],:]['tokens'].apply(lambda p: print("\n"+str(p)))


[1858, 389, 867, 3840, 1521, 661, 836, 470, 3015, 13, 6045, 286, 777, 28140, 460, 307, 5676, 416, 10833, 661, 284, 3015, 784, 883, 508, 2314, 481, 2555, 407, 284, 11, 290, 883, 508, 389, 407, 4609, 481, 3015, 15456, 13]

[7085, 661, 2314, 651, 1497, 422, 511, 670, 11, 393, 1064, 2130, 284, 804, 706, 511, 1751, 13, 843, 326, 338, 262, 1738, 1521, 484, 836, 470, 3015, 13]

[1114, 4150, 30]

[2329, 4737, 986, 357, 345, 651, 262, 4286, 5145, 8, 887, 878, 345, 3280, 11, 314, 1183, 655, 910, 428, 25, 775, 821, 477, 3910, 326, 617, 661, 836, 470, 3015, 13]

[770, 3297, 286, 281, 4578, 318, 407, 649, 13]

[4362, 262, 4819, 318, 655, 326, 25, 4819, 13, 1406, 645, 530, 318, 705, 24326, 6, 422, 6709, 780, 705, 23149, 22711, 6, 318, 691, 257, 2372, 284, 883, 508, 1682, 765, 284, 3015, 13]

[554, 616, 1570, 11, 612, 389, 734, 2761, 351, 6709, 13, 3274, 11, 6709, 318, 407, 257, 5019, 5408, 286, 1964, 12741, 13]

[4362, 366, 883, 508, 466, 16552, 391, 422, 6709, 366, 532, 355, 345, 1234, 340, 532, 3

step  agent
0     13       None
1     13       None
2     13       None
3     13       None
4     13       None
5     13       None
6     13       None
7     13       None
8     13       None
9     13       None
10    13       None
11    13       None
12    13       None
13    13       None
14    13       None
Name: tokens, dtype: object

In [193]:
df.loc[idx[:,[5]],:]['post'].apply(lambda p: print("\n"+str(p['text'])))


Many people cannot get away from their work, or find someone to look after their children. And that's the reason why they don't vote.

Proxy voting and postal voting will be available for those who cannot pgysically get to the polling station – voting by the Internet could also be arranged for.

 What would citizens gain from compulsory voting? And what are the problems with it? These are all questions that we will be addressing in the next post in this series.

 What should be done to deal with people who do not vote? How can we make it possible to vote? And should there be any restrictions on some forms of voting?

 Is it really a democracy if citizens cannot vote in a free, the full and equal and representative democracy? Is it a democracy if they cannot change the election result?

 How does this get us closer to being able to achieve what we all want,...<|endoftext|>...

 If we have access to the internet, should we be able to vote? I think that should be the preference of the vo

step  agent
0     5        None
1     5        None
2     5        None
3     5        None
4     5        None
5     5        None
6     5        None
7     5        None
8     5        None
9     5        None
10    5        None
11    5        None
12    5        None
13    5        None
14    5        None
Name: post, dtype: object

# Tests

In [33]:
text = "Replace me by any text you'd like. It is really interesting. But I don't know where to start. I have no clue. Do you know where to go?"
encoded_input = tokenizer(text)
encoded_input

{'input_ids': [3041, 5372, 502, 416, 597, 2420, 345, 1549, 588, 13, 632, 318, 1107, 3499, 13, 887, 314, 836, 470, 760, 810, 284, 923, 13, 314, 423, 645, 18437, 13, 2141, 345, 760, 810, 284, 467, 30], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [37]:
len(encoded_input['input_ids'])

36

In [41]:
output = model.generate(
    torch.tensor([encoded_input['input_ids']]).to("cuda"),
    **decoding_parameters
)
output

RuntimeError: CUDA error: device-side assert triggered

In [243]:
output.tolist()[0][len(encoded_input['input_ids']):]

[1374,
 546,
 428,
 720,
 1120,
 2855,
 30,
 9022,
 286,
 262,
 1708,
 466,
 345,
 892,
 318,
 749,
 35778,
 30,
 198,
 198]

In [52]:
tokenizer.decode(output[:,len(encoded_input['input_ids']):].tolist()[0])

" I don't really wanna embarrass myself either, but"

In [108]:
l=[1,2,3,4]
idc = [i for i,e in enumerate(l) if e in [2,3]]
l[:idc[-1]+1]

[1, 2, 3]

In [65]:
arrays = [
   np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
   np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
]

In [60]:
pd.Series(agents)

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
dtype: int64

In [73]:
df.loc['bar'][1]

one    10.0
two     0.0
Name: 1, dtype: float64

In [68]:
df.loc['bar','one'][1]=10

In [70]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.0,10.0,0.0,0.0
bar,two,0.0,0.0,0.0,0.0
baz,one,0.0,0.0,0.0,0.0
baz,two,0.0,0.0,0.0,0.0
foo,one,0.0,0.0,0.0,0.0
foo,two,0.0,0.0,0.0,0.0
qux,one,0.0,0.0,0.0,0.0
qux,two,0.0,0.0,0.0,0.0


In [71]:
df.loc['bar'][2]=df.loc['bar'][1]

In [72]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.0,10.0,10.0,0.0
bar,two,0.0,0.0,0.0,0.0
baz,one,0.0,0.0,0.0,0.0
baz,two,0.0,0.0,0.0,0.0
foo,one,0.0,0.0,0.0,0.0
foo,two,0.0,0.0,0.0,0.0
qux,one,0.0,0.0,0.0,0.0
qux,two,0.0,0.0,0.0,0.0
