# Import empirical data

In [650]:
import functools
import itertools
import json

import pandas as pd
import numpy as np

from scipy.special import logsumexp
from numpy.random import choice
from collections import defaultdict


d = pd.read_json('../../../model/lib_learning_output/synthesis_output_cogsci_revised/ca_synthesis_cogsci_21_ppt_1.json')
alpha = 0.04
lexemes = ['blah', 'blab', 'bloop', 'bleep', 'floop']

## Constructing the lexicon

In [640]:
# we assume all agents start with a basic mapping 
# between 'h'/'v' in the DSL and 'horizontal'/'vertical' in language
class BlockLexicon(dict) :
    def __init__(self, primitives, lexemes):
        """
        initialize dictionary subclass
        """
        dict.__init__(self)
        self.__dict__ = self
        unassigned_lexemes = lexemes.copy()
        
        for primitive in primitives :
            if primitive in ['v', 'h'] :
                adjective = 'horizontal' if primitive == 'h' else 'vertical'
                self.update({primitive : f'place a {adjective} block.'})
            elif primitive[0] in ['l', 'r'] :
                distance = primitive.split('_')[1]
                direction = 'right' if primitive[0] == 'r' else 'left'
                self.update({primitive : f'move to the {direction} by {distance}'})
            else :
                self.update({primitive: f'place a {unassigned_lexemes.pop()}.'})
    def __hash__(self):
        return hash(json.dumps(self, sort_keys=True))

    def invert(self):
        """
        invert keys and values of a dictionary d
        """
        return {v: k for k, v in self.items()}
    
    def dsl_to_language(self, e) :
        # parse expression e written in DSL into language
        # if dsl element unrecognized, choose at random
        return self.get(e) if e in self else choice(lexemes)
    
    def language_to_dsl(self, e) :
        # parse expression e written in DSL into language
        # if language unrecognized, choose at random
        inverted_lexicon = self.invert()
        unassigned_primitives = [k for k in self.keys() if k[:5] == 'chunk']
        return inverted_lexicon.get(e) if e in inverted_lexicon else choice(unassigned_primitives)

Let's take this class out for a drive. 

We initialize it with the primitives of the agent's DSL on a given trial and an (ordered) list of available lexemes.

In [641]:
dsl = d['dsl'][10]
l = BlockLexicon(dsl, lexemes)
print(dsl[0], '->', l.dsl_to_language(dsl[0]))
print(dsl[10], '->', l.dsl_to_language(dsl[10]))
print(dsl[-1], '->', l.dsl_to_language(dsl[-1]))

h -> place a horizontal block.
l_8 -> move to the left by 8
chunk_C -> place a blab.


and we can also go in the other direction

In [642]:
print('place a horizontal block. ->', l.language_to_dsl('place a horizontal block.'))
print('move to the left by 8 ->', l.language_to_dsl('move to the left by 8'))
print('place a blah. ->', l.language_to_dsl('place a blah.'))
print('place a flomp. ->', l.language_to_dsl('place a womp.'))

place a horizontal block. -> h
move to the left by 8 -> l_8
place a blah. -> chunk_8
place a flomp. -> chunk_Pi


## Adding probabilities

If we were using a probabilistic programming language like WebPPL, we would be able to automatically construct probability distributions over lexicons. But to do this simple example in base python, we're going to manually construct a distribution as another dictionary. The keys will be possible lexicons and the values will be their probabilities. 

Because the only thing that varies across different lexicons in our example is the word to use for a given chunk, the support of the distribution only needs to be defined over the list of possible mappings (everything else is fixed across lexicons)

In [658]:
class Distribution() :
    def __init__(self, support, probabilities, log_space = False):
        self.log_space = log_space
        self.d = {}
        for element, probability in zip(support, probabilities) :
            self.d.update({element: probability})
    
    def __str__(self) :
        return str(self.d)
 
    def copy(self) :
        return Distribution(self.support(), [self.score(k) for k in self.support()], log_space = self.log_space)
    
    def update(self, element):
        for k, val in element.items():
            if k in self.d :
                # if it already exists in the distribution, aggregate probabilities
                self.d[k] = np.logaddexp(self.d[k], val) if self.log_space else self.d[k] + val
            else : 
               # otherwise add as a new element of the distribution
                self.d[k] = val
               
    def score(self, val) :
        return self.d[val] if val in self.d else self.epsilon()
    
    def epsilon(self) :
        return np.log(0.01) if self.log_space else 0.01
    
    def support(self) :
        return list(self.d.keys())
    
    def renormalize(self) :
        Z = logsumexp(list(self.d.values())) if self.log_space else sum(self.d.values()) 
        for k, prob in self.d.items():
            self.d[k] = prob - Z if self.log_space else prob / Z
          
    def marginalize(self, f) :
        d_new = defaultdict(float)
        for k, val in self.d.items():
            d_new[f(k)] = np.logaddexp(d_new[f(k)], val) if self.log_space else d_new[f(k)] + val
        return d_new
        
    def to_logspace(self) :
        self.log_space = True
        for k, prob in self.d.items():
            self.d[k] = np.log(prob)

    def from_logspace(self) :
        self.log_space = False
        for k, prob in self.d.items():
            self.d[k] = np.exp(prob)
            
class UniformDistribution(Distribution) :
    def __init__(self, support):
        uniform_probabilities = [ 1/len(support) ] * len(support)
        super().__init__(support, uniform_probabilities)
        
class EmptyDistribution(Distribution) :
    def __init__(self):
        super().__init__([], [])

We can now define a prior over lexicons:

In [663]:
possible_lexicons = [BlockLexicon(dsl, list(mapping)) for mapping in itertools.permutations(lexemes)]
prior = UniformDistribution(possible_lexicons)
print('consider lexicon =', json.dumps(prior.support()[0], indent = 4))
print('P(lexicon) =', prior.score(prior.support()[0]))
print('we can also marginalize to look at values of chunk_C = ', 
      json.dumps(prior.marginalize(lambda d : d['chunk_C']), indent = 4))

consider lexicon = {
    "h": "place a horizontal block.",
    "v": "place a vertical block.",
    "l_0": "move to the left by 0",
    "l_1": "move to the left by 1",
    "l_2": "move to the left by 2",
    "l_3": "move to the left by 3",
    "l_4": "move to the left by 4",
    "l_5": "move to the left by 5",
    "l_6": "move to the left by 6",
    "l_7": "move to the left by 7",
    "l_8": "move to the left by 8",
    "l_9": "move to the left by 9",
    "l_10": "move to the left by 10",
    "l_11": "move to the left by 11",
    "l_12": "move to the left by 12",
    "r_0": "move to the right by 0",
    "r_1": "move to the right by 1",
    "r_2": "move to the right by 2",
    "r_3": "move to the right by 3",
    "r_4": "move to the right by 4",
    "r_5": "move to the right by 5",
    "r_6": "move to the right by 6",
    "r_7": "move to the right by 7",
    "r_8": "move to the right by 8",
    "r_9": "move to the right by 9",
    "r_10": "move to the right by 10",
    "r_11": "move to t

# Create agents

Now we're ready to define our Architect and Builder.

In [645]:
class FixedAgent() :
    def __init__(self, role, trial) :
        self.role = role
        self.actions = trial['dsl']

        # initialize beliefs to uniform prior over lexicons
        self.possible_lexicons = set([BlockLexicon(self.actions, list(mapping)) 
                                      for mapping in itertools.permutations(lexemes)])
        self.beliefs = UniformDistribution(self.possible_lexicons)
        self.utterances = set(list(self.possible_lexicons)[0].values())
        
    def act(self, observation) :
        if self.role == 'architect' :
            # get P(utt | target) by marginalizing over lexicons 
            utt_dist = EmptyDistribution()
            for lexicon in self.beliefs.support() :
                utt_dist.update({lexicon.dsl_to_language(observation) : self.beliefs.score(lexicon)})
            return choice(a = [*utt_dist.support()], 
                          p = [utt_dist.score(u) for u in utt_dist.support()])

        if self.role == 'builder' :
            # get P(a | utt) by marginalizing over lexicons 
            action_dist = EmptyDistribution()
            for lexicon in self.beliefs.support() :
                action_dist.update({lexicon.language_to_dsl(observation) : self.beliefs.score(lexicon)})
            return choice(a = [*action_dist.support()], 
                          p = [action_dist.score(a) for a in action_dist.support()])

In [646]:
architect = FixedAgent('architect', d.loc[0].to_dict())
print('architect choice: ', architect.act('h'))

builder = FixedAgent('builder', d.loc[0].to_dict())
print('builder choice: ', builder.act('place a horizontal block.'))

architect choice:  place a horizontal block.
builder choice:  h


# Run simulation

Now we have our agents, we just have to run them forward!

In [647]:
def run_simulation() :
    output = pd.DataFrame({"utt": [], "response": [], "target_program": [], "target_length" : [], "acc": []})
    for i, trial in d.iterrows() :
        architect = FixedAgent('architect', trial)
        builder = FixedAgent('builder', trial)

        # architect selects which program representation to comunicate proportional to length
        possiblePrograms = list(trial['programs_with_length'].keys())
        possibleLengths = np.array(list(trial['programs_with_length'].values()))
        utilities = np.exp(-alpha * possibleLengths) / sum(np.exp(-alpha * possibleLengths))
        target_program = choice(a = possiblePrograms, p = utilities)

        # loop through steps of target program one at a time
        utts, responses, accs = [], [], []
        for step in target_program.split(' ') :
            utt = architect.act(step)
            response = builder.act(utt)
            utts.append(utt)
            responses.append(response)
            accs.append(1.0 * (response == step))

        output = pd.concat([output, pd.DataFrame({
            "utt": utts,
            "response": responses,
            "acc": accs,
            "target_program": target_program,
            "target_length" : trial['programs_with_length'][target_program],
        })])
    return output
display(run_simulation())

Unnamed: 0,utt,response,target_program,target_length,acc
0,place a horizontal block.,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,1.0
1,move to the left by 4,l_4,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,1.0
2,place a horizontal block.,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,1.0
3,move to the left by 1,l_1,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,1.0
4,place a vertical block.,v,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,1.0
...,...,...,...,...,...
12,place a vertical block.,v,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,1.0
13,place a vertical block.,v,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,1.0
0,place a bleep.,chunk_L,chunk_Pi r_9 chunk_L,3.0,0.0
1,move to the right by 9,r_9,chunk_Pi r_9 chunk_L,3.0,1.0


TODO: make accuracy plot...

Wait, why is the accuracy so bad? Well, our agents aren't actually *learning* -- they're continuing to use their initial uniform priors.

# Update beliefs

To have our agents learn, we need to extend the agent class to do Bayesian inference.

In [679]:
class LearningAgent(FixedAgent) :
    def __init__(self, role, curr_trial, previous_trial_df) :
        super().__init__(role, curr_trial)
        combined_primitives = set().union(*previous_trial_df['dsl']) if not previous_trial_df.empty else self.actions
        self.possible_lexicons = set([BlockLexicon(set().union(combined_primitives), list(mapping)) 
                                      for mapping in itertools.permutations(lexemes)])
        self.utterances = set(list(self.possible_lexicons)[0].values())
        self.update_beliefs(previous_trial_df)

    def update_beliefs(self, previous_trial_df) :
        # Initialize posterior 
        posterior = EmptyDistribution()
        posterior.to_logspace()

        # for each data point, calculate the marginal likelihood under lexicon distribution
        # P(l | obs) = 1/Z * P(l) * \prod_{o \in obs} P(o | l)
        # log P(l|obs) = -log Z + log P(l) + \sum_{o \in obs} log P(o | l)
        for lexicon in self.beliefs.support() :
            prior_term = np.log(self.beliefs.score(lexicon))
            likelihood_term = 0
            for i, step in previous_trial_df.iterrows() :
                if self.role == 'builder' :
                    likelihood_term += np.log(self.A1(step.target, lexicon).score(step.utterance))
                elif self.role == 'architect' :
                    likelihood_term += np.log(self.B0(step.utterance, lexicon).score(step.response))
            posterior.update({lexicon : prior_term + likelihood_term})
        posterior.renormalize()
        posterior.from_logspace()
        self.beliefs = posterior
        
    def B0(self, utt, lexicon) :
        builder_dist = EmptyDistribution()
        for action in self.actions :
            builder_dist.update({action : 1 if action == lexicon.language_to_dsl(utt) else 0.01})
        builder_dist.renormalize()
        return builder_dist
        
    def A1(self, target, lexicon) :
        architect_dist = EmptyDistribution()
        for utt in self.utterances :
            architect_dist.update({utt : 1 if utt == lexicon.dsl_to_language(target) else 0.01})
        architect_dist.renormalize()
        return architect_dist

In [681]:
def run_learning_simulation() :
    output = pd.DataFrame({"utterance": [], "response": [], "target": [], "full_program" : [], "target_length" : [], "dsl" : [], "acc": []})
    for i, trial in d.iterrows() :
        architect = LearningAgent('architect', trial, output)
        builder = LearningAgent('builder', trial, output)
        
        # architect selects which program representation to comunicate proportional to length
        possiblePrograms = list(trial['programs_with_length'].keys())
        possibleLengths = np.array(list(trial['programs_with_length'].values()))
        utilities = np.exp(-alpha * possibleLengths) / sum(np.exp(-alpha * possibleLengths))
        target_program = choice(a = possiblePrograms, p = utilities)

        # loop through steps of target program one at a time
        target_steps, utts, responses, accs = [], [], [], []
        for step in target_program.split(' ') :
            utt = architect.act(step)
            response = builder.act(utt)
            target_steps.append(step)
            utts.append(utt)
            responses.append(response)
            accs.append(response == step)

        print('trial', i)
        print(pd.DataFrame({'utts' : utts, 'responses' : responses, 'correct' : accs, 'target' : target_steps}))
        print('beliefs about chunk_C meaning', 
              json.dumps(architect.beliefs.marginalize(lambda d : d['chunk_C'] if 'chunk_C' in d else None), indent = 4))
        output = pd.concat([output, pd.DataFrame({
            "utterance": utts,
            "response": responses,
            "acc": accs,
            "target" : target_steps,
            "full_program": target_program,
            "dsl" : [trial['dsl']] * len(utts),
            "target_length" : trial['programs_with_length'][target_program],
        })])
    return output
display(run_learning_simulation())

trial 0
                         utts responses  correct target
0   place a horizontal block.         h     True      h
1       move to the left by 4       l_4     True    l_4
2   place a horizontal block.         h     True      h
3       move to the left by 1       l_1     True    l_1
4     place a vertical block.         v     True      v
5     place a vertical block.         v     True      v
6      move to the right by 9       r_9     True    r_9
7     place a vertical block.         v     True      v
8      move to the right by 6       r_6     True    r_6
9     place a vertical block.         v     True      v
10      move to the left by 5       l_5     True    l_5
11  place a horizontal block.         h     True      h
12     move to the right by 4       r_4     True    r_4
13  place a horizontal block.         h     True      h
beliefs about chunk_C meaning {
    "null": 1.0
}
trial 1
                         utts responses  correct target
0   place a horizontal block.         

  output = pd.concat([output, pd.DataFrame({


trial 4
                        utts responses  correct    target
0             place a floop.  chunk_Pi    False  chunk_8b
1     move to the right by 1       r_1     True       r_1
2  place a horizontal block.         h     True         h
3    move to the right by 12      r_12     True      r_12
4  place a horizontal block.         h     True         h
5      move to the left by 4       l_4     True       l_4
6              place a blah.  chunk_8b     True  chunk_8b
beliefs about chunk_C meaning {
    "null": 1.0000000000000007
}
trial 5
                     utts responses  correct    target
0          place a floop.   chunk_C    False  chunk_Pi
1  move to the right by 9       r_9     True       r_9
2           place a blah.   chunk_L     True   chunk_L
beliefs about chunk_C meaning {
    "place a bleep.": 0.20861920648472843,
    "place a blah.": 0.30255986880954405,
    "place a floop.": 0.007909254203537592,
    "place a bloop.": 0.22618210305564773,
    "place a blab.": 0.25472956

Unnamed: 0,utterance,response,target,full_program,target_length,dsl,acc
0,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",1.0
1,move to the left by 4,l_4,l_4,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",1.0
2,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",1.0
3,move to the left by 1,l_1,l_1,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",1.0
4,place a vertical block.,v,v,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",1.0
...,...,...,...,...,...,...,...
9,move to the left by 4,l_4,l_4,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",True
10,place a horizontal block.,h,h,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",True
11,move to the left by 1,l_1,l_1,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",True
12,place a vertical block.,v,v,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,14.0,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",True


# Jointly choose program and utterance

Wow, this is great! We can see our agents are updating their beliefs about the lexicon over time and able to get somewhat more accurate as they coordinate. But one of the most interesting things about our empirical data is that speakers seem to be strategically choosing which representation of the tower to convey -- our best current theory of why participants reduce the length of their utterances over time is that even when new library chunks come online, architects don't always try to refer to them right away. They aren't confident enough that their partner will understand, as the block-level descriptions are much safer. However, the block-level descriptions are also much *costlier* in terms of time and effect because they have to laboriously describe one action at a time. 

So far, we just used a placeholder for how the speaker picks which representation to communication: they just randomly pick from the list of candidates, slightly preferring shorter programs. However, there are other considerations that ought to go into this decision, namely the estimated likelihood that the listener will do the right thing.

As an exercise, add one line of code to the simulation to weight the target program according to its utility

In [None]:
def run_strategic_simulation() :
    output = pd.DataFrame({"utterance": [], "response": [], "target": [], "full_program" : [], "target_length" : [], "dsl" : [], "acc": []})
    for i, trial in d.iterrows() :
        architect = LearningAgent('architect', trial, output)
        builder = LearningAgent('builder', trial, output)
        
        # architect selects which program representation to comunicate proportional to length
        possiblePrograms = list(trial['programs_with_length'].keys())
        possibleLengths = np.array(list(trial['programs_with_length'].values()))
        
        ## 
        ## utilities = np.exp(-alpha * possibleLengths) / sum(np.exp(-alpha * possibleLengths))
        ## 
        
        target_program = choice(a = possiblePrograms, p = utilities)

        # loop through steps of target program one at a time
        target_steps, utts, responses, accs = [], [], [], []
        for step in target_program.split(' ') :
            utt = architect.act(step)
            response = builder.act(utt)
            target_steps.append(step)
            utts.append(utt)
            responses.append(response)
            accs.append(response == step)

        print('trial', i)
        print(pd.DataFrame({'utts' : utts, 'responses' : responses, 'correct' : accs, 'target' : target_steps}))
        print('beliefs about chunk_C meaning', 
              json.dumps(architect.beliefs.marginalize(lambda d : d['chunk_C'] if 'chunk_C' in d else None), indent = 4))
        output = pd.concat([output, pd.DataFrame({
            "utterance": utts,
            "response": responses,
            "acc": accs,
            "target" : target_steps,
            "full_program": target_program,
            "dsl" : [trial['dsl']] * len(utts),
            "target_length" : trial['programs_with_length'][target_program],
        })])
    return output
display(run_learning_simulation())