# Model

Our model is split into three distinct phases.

1. Infer libraries of abstractions using Dreamcoder program abstraction
2. Refactoring programs using libraries of abstractions
3. Bayesian model of convention formation

In [1]:
# setup

import os
import sys
import urllib, io
os.getcwd()

import numpy as np
import pandas as pd

from collections import Counter
import json
import re
import ast
import pickle

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns

from io import BytesIO
import base64

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [2]:
df_trial = pd.read_csv('../results/csv/df_trial.csv')
df_trial = df_trial[~df_trial.practice]

# 75% Accuracy on 75% of trials
df75 = pd.DataFrame(df_trial.groupby(['gameid', 'trialNum'])['trialScore'].sum()>75).groupby(['gameid']).sum()
df75['trials'] = df75['trialScore']

df75 = df75[df75['trials']>=9]
includedGames = list(df75.reset_index().gameid)

print("Total dyads achieving 75% Accuracy on 75% of trials:",len(df75))

df_trial = df_trial[df_trial.gameid.isin(includedGames)]

df_trial['tower_pair'] = df_trial.leftTarget + df_trial.rightTarget
df_trial.head()

Total dyads achieving 75% Accuracy on 75% of trials: 49


Unnamed: 0.1,Unnamed: 0,_id,iterationName,gameid,time,assignmentId,leftTarget,rightTarget,trialNum,turnNum,...,word_count,char_count,n_messages,time_final_block,total_block_duration,total_chat_duration,total_duration,diff,flagged,tower_pair
2,2,5ef784d6ce2f7f58fcd0a3a0,pilot0,1494-029e2297-bd3f-4cfe-be00-d06d36724e4e,1593280000000.0,3E47SOBEYUW78Q4UOK6BV9E6YCECIM,C,Pi,0.0,0,...,106.0,494.0,7.0,270221.0,95525.0,163658.0,259183.0,11038.0,False,CPi
3,3,5ef7859cce2f7f58fcd0a3d3,pilot0,1494-029e2297-bd3f-4cfe-be00-d06d36724e4e,1593280000000.0,3E47SOBEYUW78Q4UOK6BV9E6YCECIM,L,C,1.0,0,...,88.0,421.0,6.0,191955.0,63542.0,120868.0,184410.0,7545.0,False,LC
4,4,5ef7863ece2f7f58fcd0a405,pilot0,1494-029e2297-bd3f-4cfe-be00-d06d36724e4e,1593280000000.0,3E47SOBEYUW78Q4UOK6BV9E6YCECIM,L,Pi,2.0,0,...,58.0,257.0,3.0,157681.0,67865.0,87195.0,155060.0,2621.0,False,LPi
5,5,5ef786b6ce2f7f58fcd0a439,pilot0,1494-029e2297-bd3f-4cfe-be00-d06d36724e4e,1593280000000.0,3E47SOBEYUW78Q4UOK6BV9E6YCECIM,Pi,C,3.0,0,...,42.0,191.0,2.0,114105.0,59293.0,53685.0,112978.0,1127.0,False,PiC
6,6,5ef78732ce2f7f58fcd0a477,pilot0,1494-029e2297-bd3f-4cfe-be00-d06d36724e4e,1593280000000.0,3E47SOBEYUW78Q4UOK6BV9E6YCECIM,C,L,4.0,0,...,29.0,144.0,2.0,119006.0,68445.0,47986.0,116431.0,2575.0,False,CL


## 1. Infering libraries using Dreamcoder



In [19]:
# import model
sys.path.append("./lib_learning/")

from towerPrimitives import primitives
from makeTowerTasks import *
from grammar import *
from fragmentGrammar import *
from gen_seq import *
from utilities import *
import numpy as np
import pickle

In [42]:
ppts = df_trial.gameid.unique()
trials = df_trial.trialNum.unique()

trial_seqs = {} # config is just trial sequence

for i, ppt in enumerate(ppts) :
    trial_seqs[i+1] = []
    for j, trial in enumerate(trials) :
        trial_seqs[i+1].append(df_trial.loc[(df_trial.gameid == ppt) & (df_trial.trialNum == trial),str('tower_pair')].iloc[0])
        # trial_seqs[i][j] = ['tower_pair']

In [51]:
towers = dict(C = SupervisedTower("C", "(h (l 1) v v (r 1) h)"),
                L = SupervisedTower("L", "(h (l 4) h (l 1) v v)"),
                Pi = SupervisedTower("Pi", "(v (r 6) v (l 5) h (r 4) h)"),
                CL = SupervisedTower("CL", "(h (l 1) v v (r 1) h (r 12) h (l 4) h (l 1) v v)"),
                CPi = SupervisedTower("CPi", "(h (l 1) v v (r 1) h (r 6) v (r 6) v (l 5) h (r 4) h)"),
                LPi = SupervisedTower("LPi", "(h (l 4) h (l 1) v v (r 9) v (r 6) v (l 5) h (r 4) h)"),
                LC = SupervisedTower("LC", "(h (l 4) h (l 1) v v (r 12) h (l 1) v v (r 1) h)"),
                PiC = SupervisedTower("PiC", "(v (r 6) v (l 5) h (r 4) h (r 7) h (l 1) v v (r 1) h)"),
                PiL = SupervisedTower("PiL", "(v (r 6) v (l 5) h (r 4) h (r 9) h (l 4) h (l 1) v v)"))

In [67]:
primitives
g0 = Grammar.uniform(primitives, continuationType=ttower)
ws = [1.5, 3.2, 3.3, 9.6]
# ws = list(np.arange(0.0, 10, 0.1))
path = './lib_learning/dsls/'


In [68]:
for ppt in range(1, len(ppts)+1):
    dir_path = os.path.join(path, str(ppt))
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    with open(os.path.join(dir_path, 'configs.p'), 'wb') as fp:
        pickle.dump(trial_seqs[ppt], fp, protocol=pickle.HIGHEST_PROTOCOL)

In [72]:
jobs[0]

[((lambda (2x1 (left 1 (1x2 (1x2 (right 1 (2x1 (right 6 (1x2 (right 6 (1x2 (left 5 (2x1 (right 4 (2x1 $0))))))))))))))),
  None)]

In [84]:
FragmentGrammar.induceFromFrontiers(combo[0][0], combo[0][1], combo[0][2])

[]

In [87]:
#single ppt first
ppt = 1

jobs = []
for i in range(len(trial_seqs[ppt])+1):
    job = []
    for tower_scene in trial_seqs[ppt][:i+1]:
        job.append((towers[tower_scene].original, None))
    jobs.append(job)

for i, job in enumerate(jobs):
    print('this job is of length', i+1)
    combo = [(g0, [Frontier.dummy(p, tp=tp) for p, tp in job], w) for w in ws]
    CPUs = 1 # currently not working with >1 cpu. 
    eprint('this is len:', CPUs)
    # print(combo[0])
    results = parallelMap(CPUs, lambda param: FragmentGrammar.induceFromFrontiers(param[0], param[1], param[2]), combo)
    print('saving!')
    with open(path+'{}.p'.format(i+1), 'wb') as fp:
        pickle.dump(results, fp, protocol=pickle.HIGHEST_PROTOCOL)


this job is of length 1


this is len: 1


saving!
this job is of length 2


this is len: 1


saving!
this job is of length 3


this is len: 1


saving!
this job is of length 4


this is len: 1


# NEXT: try and get this running in parallel (try the script version)

In [53]:
towers[trial_seqs[1][0]]

Task(name=CPi, request=tower -> tower, examples=[]

## 2. Refactor programs using libraries of abstractions

In [17]:
# import model
sys.path.append("./lib_learning/")

from program import *
import utilities
import render
from parsePrograms import *

from towerPrimitives import primitives
from makeTowerTasks import *
from grammar import *
from fragmentGrammar import *
from gen_seq import *
from enumeration import *
import importlib

from refactorPrograms import * 
import refactorPrograms

In [16]:
importlib.reload(refactorPrograms)

<module 'refactorPrograms' from '/Users/will/compositional-abstractions-ms/model/./lib_learning/refactorPrograms.py'>

In [18]:
# load libraries learned in  previous section
# Read in the DSLs learned by dreamcoder

# todo: update with libraries learned above
# data_path = '../data/model/dsls/'
data_path = './lib_learning/results/revised/'

dsls = {}
trial_seqs = {}

for ppt in range(1,50):
    
    dsls[ppt] = {}
    
    # read participants' trial sequence
    with open(data_path+f"{ppt}/configs.p", "rb") as config_file:
            trial_seqs[ppt] = pickle.load(config_file)
    
    # read inferred DSLs
    for trial in range(1, 13):
        with open(data_path+f"{ppt}/{trial}.p", "rb") as input_file:
            dsls[ppt][trial] = pickle.load(input_file)

def check_values(value, valid_values, parameter_type):
    if value not in valid_values:
        raise ValueError(f'{parameter_type} must be one of the following values: {valid_values}.')
            
def read_library(ppt, trial, w = 3.2, base_dsl_only=True, sort=False):
    '''
    Returns dsl learned by dreamcoder
    '''
    check_values(ppt, range(1, 50), 'ppt')
    check_values(trial, range(1, 13), 'trial')
    check_values(w, ws, 'w')
    
    lib = [parse(str(fragment), base_dsl_only=base_dsl_only) for fragment in dsls[ppt][trial][w_index[w]]]
    
    if sort:
        lib = sorted(lib, key=lambda x: len(x.split()))
    
    return lib

In [19]:
# setup
ws = [1.5, 3.3, 9.6] # values of w we are considering
w_index = {1.5 : 0,
           3.3 : 2,
           9.6: 3} # positional index of w in loaded data
trials = range(1,13)
ppts = range(1,50)

w = 3.3 # stick with 3.3 from here on
w_position = w_index[w]

See `refactorPrograms.py` for implementation. We incrementally swap in the largest chunks available, checking for smaller chunks until no more swaps are possible.

In [20]:
# refactor the programs
refactor_programs(dsls,
                  trial_seqs,
                  w_position = w_position)

Programs saved in ./data/language_output/programs_ppt_[ppt].json


In [22]:
# inspect programs

ppt = 1

ppt_data = pd.read_json('./data/language_output/programs_ppt_{}.json'.format(ppt))
ppt_data

Unnamed: 0,ppt,trial_num,towers,dsl_lambda,chunks,dsl,min_program,programs_with_length
0,1,1,LPi,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",[],"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,{'h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h': 14}
1,1,2,LC,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",[],"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",h l_4 h l_1 v v r_12 h l_1 v v r_1 h,{'h l_4 h l_1 v v r_12 h l_1 v v r_1 h': 13}
2,1,3,CPi,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",[],"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",h l_1 v v r_1 h r_6 v r_6 v l_5 h r_4 h,{'h l_1 v v r_1 h r_6 v r_6 v l_5 h r_4 h': 14}
3,1,4,PiC,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",[chunk_8b],"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",v r_6 v l_5 h r_4 h r_7 chunk_8b h,"{'v r_6 v l_5 h r_4 h r_7 chunk_8b h': 10, 'v ..."
4,1,5,CL,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_8b, chunk_Pi]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_8b h r_12 h l_4 h l_1 v v,"{'chunk_8b h r_12 h l_4 h l_1 v v': 9, 'h l_1 ..."
5,1,6,PiL,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_8, chunk_Pi, chunk_L, chunk_C]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_Pi r_9 chunk_L,"{'chunk_Pi r_9 chunk_L': 3, 'v r_6 v l_5 h r_4..."
6,1,7,LC,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_8, chunk_Pi, chunk_L, chunk_C]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_L r_12 chunk_C,"{'chunk_L r_12 chunk_C': 3, 'h l_4 h l_1 v v r..."
7,1,8,CPi,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_8, chunk_Pi, chunk_L, chunk_C]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_C r_6 chunk_Pi,"{'chunk_C r_6 chunk_Pi': 3, 'chunk_C r_6 v r_6..."
8,1,9,LPi,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_8, chunk_Pi, chunk_L, chunk_C]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_L r_9 chunk_Pi,"{'chunk_L r_9 chunk_Pi': 3, 'chunk_L r_9 v r_6..."
9,1,10,CL,"[2x1, 1x2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...","[chunk_Pi, chunk_8, chunk_L, chunk_C]","[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",chunk_C r_12 chunk_L,"{'chunk_C r_12 chunk_L': 3, 'chunk_C r_12 h l_..."


We also compute a range of programs of different lengths, by replacing chunks their base DSL equivalents. This provides us with a range of programs of different lengths that the convention formation can select between. Longer programs are less ambiguous, but more costly to communicate.

In [25]:
# look at the programs + lengths inferred for the final trial for one trial sequence
ppt_data['programs_with_length'][11]

{'chunk_Pi r_9 chunk_L': 3,
 'v r_6 v l_5 h r_4 h r_9 chunk_L': 9,
 'chunk_Pi r_9 h l_4 h l_1 v v': 8,
 'v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v': 14}

## 3. Bayesian model of convention formation

Now we have inferred libraries of abstractions, and programs representing each scene that use more or less of these abstractions. Now we want to model how agents coordinate on words for describing a scene.

We provide two implementations of our convention formation model: in webppl and in python.

### Python implementation

In [38]:
# import classes for our model
sys.path.append("../model/convention_formation/python_implementation/")
from distribution import *
from lexicon import *

In [43]:
# data for a single "participant"/ trial sequence
empirical_data = pd.read_json('data/language_output/programs_ppt_1.json')

# Define a set of meaningless placeholder words available to be bound to meanings
lexemes = ['blah', 'blab', 'bloop', 'bleep', 'floop'] 

In [44]:
class FixedAgent() :
    def __init__(self, role, trial) :
        '''
        Args: 
           * role: string giving agent's role in the task ('architect' or 'builder')
           * trial: dictionary of meta-data about the current trial 
        '''

        # initialize beliefs to a uniform prior over possible lexicons, as above
        self.beliefs = LexiconPrior(trial['dsl'], lexemes)

        # set other useful properties
        self.role = role
        self.actions = trial['dsl']
        self.utterances = self.beliefs.sample().utterances
        
    def act(self, observation) :
        '''
        produce an action based on role and current beliefs
        '''
        if self.role == 'architect' :
            # Architect is going to build up a distribution over utterances to say
            utt_dist = self.beliefs.marginalize(lambda l : l.dsl_to_language(observation))
            return utt_dist.sample()

        if self.role == 'builder' :
            # get P(a | utt) by marginalizing over lexicons 
            action_dist = self.beliefs.marginalize(lambda l : l.language_to_dsl(observation))
            return action_dist.sample()

In [45]:
def run_simulation(trial_info) :
    output = SimulationOutput()
    for i, trial in trial_info.iterrows() :
        # Initialize agents using current trial metadata
        architect = FixedAgent('architect', trial)
        builder = FixedAgent('builder', trial)

        # if there are multiple program representations, randomly 
        # select which one to comunicate (we will return to this)
        target_program = choice(list(trial['programs_with_length'].keys()))

        # loop through steps of target program one at a time
        # produce an utterance from architect and response from builder
        for step in target_program.split(' ') :
            utt = architect.act(step)
            response = builder.act(utt)
            output.save(step, utt, response) 

        # flush output buffer and prepare for next trial
        output.flush(trial, target_program)
    return output.get_df()

In [52]:
# we can call the run_simulation function above to simulate the utterances and actions that would be generated 
# in a 12-trial interaction between the Architect and Builder. Note that if you run it multiple times, you'll get 
# different simulated outcomes each time. 
run_0 = run_simulation(empirical_data)
display(run_0)

Unnamed: 0,trial,utterance,response,intention,target_program,dsl,target_length,acc
0,1.0,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
1,1.0,move to the left by 4,l_4,l_4,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
2,1.0,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
3,1.0,move to the left by 1,l_1,l_1,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
4,1.0,place a vertical block.,v,v,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
...,...,...,...,...,...,...,...,...
9,12.0,move to the left by 4,l_4,l_4,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
10,12.0,place a horizontal block.,h,h,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
11,12.0,move to the left by 1,l_1,l_1,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
12,12.0,place a vertical block.,v,v,v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0


In [53]:
print('target program: \n', run_0.query('trial==1').loc[0,'target_program'])
run_0.query('trial==1')[['utterance','intention','response','acc']]

target program: 
 h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h


Unnamed: 0,utterance,intention,response,acc
0,place a horizontal block.,h,h,1.0
1,move to the left by 4,l_4,l_4,1.0
2,place a horizontal block.,h,h,1.0
3,move to the left by 1,l_1,l_1,1.0
4,place a vertical block.,v,v,1.0
5,place a vertical block.,v,v,1.0
6,move to the right by 9,r_9,r_9,1.0
7,place a vertical block.,v,v,1.0
8,move to the right by 6,r_6,r_6,1.0
9,place a vertical block.,v,v,1.0


In [55]:
# let's inspect the final trial
print('target program: \n', run_0.query('trial==12').loc[0,'target_program'])
run_0.query('trial==12')[['utterance','intention','response','acc']]

target program: 
 v r_6 v l_5 h r_4 h r_9 h l_4 h l_1 v v


Unnamed: 0,utterance,intention,response,acc
0,place a vertical block.,v,v,1.0
1,move to the right by 6,r_6,r_6,1.0
2,place a vertical block.,v,v,1.0
3,move to the left by 5,l_5,l_5,1.0
4,place a horizontal block.,h,h,1.0
5,move to the right by 4,r_4,r_4,1.0
6,place a horizontal block.,h,h,1.0
7,move to the right by 9,r_9,r_9,1.0
8,place a horizontal block.,h,h,1.0
9,move to the left by 4,l_4,l_4,1.0


#### Simulating learning

In [57]:
class LearningAgent(FixedAgent) :
    def __init__(self, role, curr_trial, previous_trials) :
        super().__init__(role, curr_trial)
        combined_primitives = (set().union(curr_trial['dsl'], *previous_trials['dsl']) 
                                if not previous_trials.empty else self.actions)

        # Initialize beliefs to uniform prior and then compute 
        self.prior = LexiconPrior(combined_primitives, lexemes)
        self.update_beliefs(previous_trials)

    def B0(self, utterance, lexicon) :
        '''
        simple builder agent that has equal probability
        of building anything that's literally consistent with the utterance
        '''
        builder_dist = EmptyDistribution()
        for action in self.actions :
            builder_dist.update({action : 1 if action == lexicon.language_to_dsl(utterance) else 0.01})
        builder_dist.renormalize()
        return builder_dist
        
    def A0(self, intention, lexicon) :
        '''
        simple architect agent that has equal probability
        of saying anything that's literally consistent with the intention
        '''
        architect_dist = EmptyDistribution()
        for utt in self.utterances :
            architect_dist.update({utt : 1 if utt == lexicon.dsl_to_language(intention) else 0.01})
        architect_dist.renormalize()
        return architect_dist

    def update_beliefs(self, previous_trial_df) :
        '''
        run bayes rule given observations in previous trials
        note that we run the calculation in log space because it's more numerically stable
        '''
        posterior = EmptyDistribution()
        posterior.to_logspace()

        # we're manually doing the calculation in Bayes Rule
        # P(l | obs) \propto P(l) * \prod_{o \in obs} P(o | l)
        # ==> log P(l|obs) \propto log P(l) + \sum_{o \in obs} log P(o | l)
        for lexicon in self.prior.support() :
            # calculate the likelihood of the previous data under each lexicon, 
            likelihood_term = 0
            for i, step in previous_trial_df.iterrows() :
                if self.role == 'architect' :
                    likelihood_term += np.log(self.B0(step.utterance, lexicon).score(step.response))
                elif self.role == 'builder' :
                    likelihood_term += np.log(self.A0(step.intention, lexicon).score(step.utterance))
            
            # weight by the prior probability of that lexicon
            prior_term = np.log(self.prior.score(lexicon))
            posterior.update({lexicon : prior_term + likelihood_term})

        # Renormalize (this is the \propto part of Bayes Rule, the denominator)
        posterior.renormalize()
        posterior.from_logspace()
        self.beliefs = posterior

In [58]:
def run_learning_simulation(trial_info, verbose = False) :
    output = SimulationOutput()
    for i, current_trial in trial_info.iterrows() :
        clear_output(wait=True)
        print(f'trial {i}/12')
        
        # construct agents with updated beliefs up to this point
        previous_trials = output.get_df()
        architect = LearningAgent('architect', current_trial, previous_trials)
        builder = LearningAgent('builder', current_trial, previous_trials) 

        # random program selected from the options
        target_program = choice(list(current_trial['programs_with_length'].keys()))

        # loop through steps of target program one at a time
        for step in target_program.split(' ') :
            utt = architect.act(step)
            response = builder.act(utt)
            output.save(step, utt, response) 
        
        output.flush(current_trial, target_program)
    return output.get_df()

In [59]:
learning_run_0 = run_learning_simulation(empirical_data, verbose = True)
learning_run_0

trial 11/12


Unnamed: 0,trial,utterance,response,intention,target_program,dsl,target_length,acc
0,1.0,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
1,1.0,move to the left by 4,l_4,l_4,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
2,1.0,place a horizontal block.,h,h,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
3,1.0,move to the left by 1,l_1,l_1,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
4,1.0,place a vertical block.,v,v,h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",14.0,1.0
...,...,...,...,...,...,...,...,...
7,11.0,move to the right by 7,r_7,r_7,v r_6 v l_5 h r_4 h r_7 chunk_C,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",9.0,1.0
8,11.0,place a bleep.,chunk_8,chunk_C,v r_6 v l_5 h r_4 h r_7 chunk_C,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",9.0,0.0
0,12.0,place a blah.,chunk_C,chunk_Pi,chunk_Pi r_9 chunk_L,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",3.0,0.0
1,12.0,move to the right by 9,r_9,r_9,chunk_Pi r_9 chunk_L,"[h, v, l_0, l_1, l_2, l_3, l_4, l_5, l_6, l_7,...",3.0,1.0


#### Choosing programs

In [60]:
from scipy.special import softmax

class StrategicArchitect(LearningAgent) :
    def __init__(self, curr_trial, previous_trials) :
        super().__init__('architect', curr_trial, previous_trials) 
        self.alpha = 2
        self.beta = .3

    def expected_inf(self, utterance, intention) :
        '''
        computes the expected utility of transmitting the given intention with utterance
        accounting for uncertainty of whether utterance will work
        '''
            # calculate expected inf(u) = \sum_a*\in A* \sum_L P(L) * ln P_B(a* | u, L) 
        return sum([
            self.beliefs.score(l) * np.log(self.B0(utterance, l).score(intention))
            for l in self.beliefs.support()
        ])
        
    def speak(self, possible_programs) :
        '''
        produce an action based on role and current beliefs
        '''
        # Architect is going to build up a distribution over utterances to say
        # Architect selects which program representation to comunicate proportional to informativity and length
        p_utils = []
        for target_program in possible_programs: 
            step_utils = []
            for step in target_program.split(' ') :
                utt_utils = np.array([self.expected_inf(utt, step) for utt in self.utterances])
                step_utils.append(sum(utt_utils * softmax(self.alpha * utt_utils)))
            p_utils.append(   
                (1 - self.beta) * np.mean(step_utils) 
              - (    self.beta) * len(target_program.split(' '))
            )
            print(p_utils)
        # sample a program
        chosen_p = choice(a = possible_programs, p = softmax(self.alpha * np.array(p_utils)))
        # sample utterances for that program
        return chosen_p, [self.act(step) for step in chosen_p.split(' ')]

def run_strategic_simulation(empirical_data) :
    output = SimulationOutput()
    for i, current_trial in empirical_data.iterrows() :
        previous_trials = output.get_df()
        architect = StrategicArchitect(current_trial, previous_trials)
        builder = LearningAgent('builder', current_trial, previous_trials)

        # Jointly pick a program to communicate, and what to say
        possible_programs = list(current_trial['programs_with_length'].keys())
        chosen_program, utt_seq = architect.speak(possible_programs)
        print(f'trial: {i}, chosen program: {chosen_program}')

        # loop through steps of target program one at a time
        for intention, utt in zip(chosen_program.split(' '), utt_seq) :
            response = builder.act(utt)
            output.save(intention, utt, response)
            
        output.flush(current_trial, chosen_program)
    return output.get_df()

print(run_strategic_simulation(empirical_data))

[-4.375992165077049]
trial: 0, chosen program: h l_4 h l_1 v v r_9 v r_6 v l_5 h r_4 h
[-4.075992165077048]
trial: 1, chosen program: h l_4 h l_1 v v r_12 h l_1 v v r_1 h
[-4.375992165077049]
trial: 2, chosen program: h l_1 v v r_1 h r_6 v r_6 v l_5 h r_4 h
[-3.181802985509923]
[-3.181802985509923, -4.381802985509923]
trial: 3, chosen program: v r_6 v l_5 h r_4 h r_7 chunk_8b h
[-2.8912079586132675]
[-2.8912079586132675, -4.0875710160956835]
trial: 4, chosen program: h l_1 v v r_1 h r_12 h l_4 h l_1 v v
[-2.8975790919034914]
[-2.8975790919034914, -3.1994409247394207]
[-2.8975790919034914, -3.1994409247394207, -2.936894378918522]
[-2.8975790919034914, -3.1994409247394207, -2.936894378918522, -4.399813291306607]
trial: 5, chosen program: v r_6 v l_5 h r_4 h r_9 chunk_L
[-2.7823767671564292]
[-2.7823767671564292, -2.9152875630396595]
[-2.7823767671564292, -2.9152875630396595, -2.9152875630396595]
[-2.7823767671564292, -2.9152875630396595, -2.9152875630396595, -4.099805439012713]
trial: 6,

### webppl implementation

1. Move to the `webppl_implementation` directory.

2. Run `webppl_implementation/input/generate_grid.py` to generate the input files for the webppl model.

3. Run `run_model.sh` to run the webppl model on the generated input files, using the following script:

`parallel --bar --colsep ',' "sh ./run_model.sh {1} {2} {3} {4}" :::: webppl_input/grid_49ppts.csv
webppl coordinate_DSL_pragmatic_speaker.wppl --require webppl-json --require webppl-csv -- --numIterations=2 --chainNum=$1 --alpha=$2 --beta=$3 --participantNumber=$4`