# Urn strategy for two agent signal learning game

Adapted from the model described in Argiento et al. (2009) 'Learning to Signal: Analysis of a micro-level reinforcement model' 

In [1]:
from numpy.random import choice
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from collections import Counter

In [2]:
def proportionOf(arr, element_type):
    counts = Counter(arr)
    el_count = counts[element_type]
    total = sum(counts.values())
    return el_count / total

## Initial Parameters

In [3]:
total_rounds = 70

# these don't change; just used as a target for sampling in each round
nature_states = ['1', '2']

## Run the actual game over `total_rounds` rounds

In [10]:
# Not essential for running the model, but used to graph
# the results.
outcome = []
sender_proportions_1 = []
receiver_proportions_1 = []

# The two urns for each agent are stored in a dictionary
# with the 'input' labels as keys for ease of updating
# these urn objects are updated as learning occurs.
# Feel like I should probably use a DataFrame (just store
# token counts for each urn, and use them as weights for
# selecting between tokens each round?)
# but I quite like the simplicity of this approach...
sender_urns = { 
                '1': ['A', 'B'], 
                '2': ['A', 'B'],
            }
receiver_urns = { 
                'A': ['1', '2'], 
                'B': ['1', '2'] 
            }

for i in range(total_rounds):
    state = choice(nature_states)
    signal = choice(sender_urns[state])
    action = choice(receiver_urns[signal])
    sender_proportions_1.append(proportionOf(sender_urns['1'], 'A'))
    receiver_proportions_1.append(proportionOf(receiver_urns['A'], '1'))
    if action == state:
        outcome.append(1)
        sender_urns[state].append(signal)
        receiver_urns[signal].append(action)
    else:
        outcome.append(0)

## Graphing the outcomes

Histogram of cumulative wins, plotted against the proportions of As in Sender Urn 1, and of 1s in Receiver Urn A. (If I've understood it corrently, the hope is that these will correlate over time, both converging towards either 1 or 0.)

In [11]:
rounds = list(range(0, total_rounds))

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Histogram(
        x=rounds,
        y=outcome,
        nbinsx=total_rounds,
        histfunc="sum",
        cumulative={'enabled':True, 'direction': 'increasing', 'currentbin': 'half'},
        name="Cumulative wins",
        marker={"color": "#b0bec5"}
    ),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(
        x=rounds,
        y=sender_proportions_1,
        name="Proportion of As in Sender Urn 1",
        marker={"color": "#d81b60"}
    ),
    secondary_y=True
)
fig.add_trace(
    go.Scatter(
        x=rounds,
        y=receiver_proportions_1,
        name="Proportion of 1s in Receiver Urn A",
        marker={"color": "#3f51b5"},
    ),
    secondary_y=True
)

fig.update_xaxes(title="Rounds")
fig.update_yaxes(title="Cumulative wins", showgrid=False, secondary_y=False)
fig.update_yaxes(range=[0, 1], title="Proportion of tokens in urns", showgrid=False, secondary_y=True)
fig.update_layout(plot_bgcolor="#f4f4f4")
fig.show()

# Generalising the code for other game structures

Game structure fixed by passing in `states` and `signals` arrays. So e.g. you can have a 'two state, three signals' game by passing in `states=['1', '2']` and `signals=['a', 'b']`.

Choices are weighted just by urn contents, rather than having any content bias. In theory an initial bias could be introduced either by providing weight constants or by passing in unbalanced urns as starting conditions, e.g. `signals=['a', 'a', 'a', 'b']`. However, neither are possible at present. (Whilst the unbalanced starting-points method would work in the above script, it won't here because tokens are used as column names, which need to be unique.)

In [12]:
import pandas as pd

def signal_simulation(states, signals, total_rounds):
    nature_states = states
    outcomes = []
    
    # construct urn dfs
    sender_headers = pd.MultiIndex.from_product([states, signals], names=["urn", "token"]) 
    receiver_headers = pd.MultiIndex.from_product([signals, states], names=["urn", "token"]) 
    sender_urns = pd.DataFrame(columns=sender_headers)
    receiver_urns = pd.DataFrame(columns=receiver_headers)
    
    # initialise first row of urns
    sender_urns.loc[1] = 1
    receiver_urns.loc[1] = 1
    
    # run model
    for i in range(2, total_rounds + 1):
        # copy previous round's urns
        sender_urns.loc[i] = sender_urns.loc[i - 1]
        receiver_urns.loc[i] = receiver_urns.loc[i - 1]
        
        # choose state of nature at random
        state = choice(nature_states)

        # choose signal based on state, weighted according to urn contents 
        sender_numerators = sender_urns.loc[i, state]
        sender_denominator = sum(sender_numerators)
        sender_weights = [n / sender_denominator for n in sender_numerators]
        signal = choice(signals, 1, p=sender_weights)[0]
        
        # choose action based on signal, weighted according to urn contents
        receiver_numerators = receiver_urns.loc[i, signal]
        receiver_denominator = sum(receiver_numerators)
        receiver_weights = [n / receiver_denominator for n in receiver_numerators]
        action = choice(states, 1, p=receiver_weights)[0]
        
        # update urns if state and action match, and record win/loss outcome
        if state == action:
            sender_urns.loc[i, (state, signal)] = sender_urns.loc[i, (state, signal)] + 1
            receiver_urns.loc[i, (signal, action)] = receiver_urns.loc[i, (signal, action)] + 1
            outcomes.append(1)
        else:
            outcomes.append(0)
            
    return { 'sender_urns': sender_urns, 'receiver_urns': receiver_urns, 'outcomes': outcomes }

## Plotting results

In [13]:
results = signal_simulation(['1', '2', '3'], ['a', 'b'], 1000)
s = results['sender_urns'].copy()

fig1 = go.Figure()
for column in s.columns:
    fig1.add_trace(
        go.Scatter(
            x=s.index,
            y=s[column],
            name="_".join(column),
        )
    )
    
fig1.show()

In [110]:
r = results['receiver_urns'].copy()

fig2 = go.Figure()
for column in r.columns:
    fig2.add_trace(
        go.Scatter(
            x=r.index,
            y=r[column],
            name="_".join(column),
        )
    )
    
fig2.show()