# Urn strategy for two agent signal learning game

Adapted from the model described in Argiento et al. (2009) 'Learning to Signal: Analysis of a micro-level reinforcement model' 

In [314]:
import random
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from collections import Counter

In [315]:
def proportionOf(arr, element_type):
    counts = Counter(arr)
    el_count = counts[element_type]
    total = sum(counts.values())
    return el_count / total

## Initial Parameters

In [316]:
total_rounds = 70

# these don't change; just used as a target for sampling in each round
nature_states = ['1', '2']

## Run the actual game over `total_rounds` rounds

In [328]:
# Not essential for running the model, but used to graph
# the results.
outcome = []
sender_proportions_1 = []
receiver_proportions_1 = []

# The two urns for each agent are stored in a dictionary
# with the 'input' labels as keys for ease of updating
# these urn objects are updated as learning occurs.
# Feel like I should probably use a DataFrame (just store
# token counts for each urn, and use them as weights for
# selecting between tokens each round?)
# but I quite like the simplicity of this approach...
sender_urns = { 
                '1': ['A', 'B'], 
                '2': ['A', 'B'],
            }
receiver_urns = { 
                'A': ['1', '2'], 
                'B': ['1', '2'] 
            }

for i in range(total_rounds):
    state = random.choice(nature_states)
    signal = random.choice(sender_urns[state])
    action = random.choice(receiver_urns[signal])
    sender_proportions_1.append(proportionOf(sender_urns['1'], 'A'))
    receiver_proportions_1.append(proportionOf(receiver_urns['A'], '1'))
    if action == state:
        outcome.append(1)
        sender_urns[state].append(signal)
        receiver_urns[signal].append(action)
    else:
        outcome.append(0)

## Graphing the outcomes

Histogram of cumulative wins, plotted against the proportions of As in Sender Urn 1, and of 1s in Receiver Urn A. (If I've understood it corrently, the hope is that these will correlate over time, both converging towards either 1 or 0.)

In [329]:
rounds = list(range(0, total_rounds))

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Histogram(
        x=rounds,
        y=outcome,
        nbinsx=total_rounds,
        histfunc="sum",
        cumulative={'enabled':True, 'direction': 'increasing', 'currentbin': 'half'},
        name="Cumulative wins",
        marker={"color": "#b0bec5"}
    ),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(
        x=rounds,
        y=sender_proportions_1,
        name="Proportion of As in Sender Urn 1",
        marker={"color": "#d81b60"}
    ),
    secondary_y=True
)
fig.add_trace(
    go.Scatter(
        x=rounds,
        y=receiver_proportions_1,
        name="Proportion of 1s in Receiver Urn A",
        marker={"color": "#3f51b5"},
    ),
    secondary_y=True
)

fig.update_xaxes(title="Rounds")
fig.update_yaxes(title="Cumulative wins", showgrid=False, secondary_y=False)
fig.update_yaxes(range=[0, 1], title="Proportion of tokens in urns", showgrid=False, secondary_y=True)
fig.update_layout(plot_bgcolor="#f4f4f4")
fig.show()