# Emergent Communications Pipeline

In [125]:
import json
import csv
import codecs
import time
import argparse
import os
import datetime
import numpy as np
import torch
import torch.nn.functional as F
import nets
import sampling
import alive_sieve

from torch import autograd, optim, nn
from torch.autograd import Variable
from ecn import State
from nltk import bigrams

In [41]:
# Arguments
model_file = 'model_saves/model.dat'
batch_size = 128
test_seed = 123
term_entropy_reg = 0.05
utterance_entropy_reg = 0.001
proposal_entropy_reg = 0.05

## Training

In [42]:
train_r = np.random
test_r = np.random.RandomState(test_seed)
test_batches = sampling.generate_test_batches(batch_size=batch_size, num_batches=5, random_state=test_r)
test_hashes = sampling.hash_batches(test_batches)

In [27]:
for k, v in test_batches[1].items():
    try:
        print(k, v.shape)
    except:
        print(k, [e.shape for e in v])

pool torch.Size([128, 3])
utilities [torch.Size([128, 3]), torch.Size([128, 3])]
N torch.Size([128])


In [44]:
model = nets.AgentModel(
    enable_comms=True,
    enable_proposal=True,
    term_entropy_reg=term_entropy_reg,
    utterance_entropy_reg=utterance_entropy_reg,
    proposal_entropy_reg=proposal_entropy_reg
)
model = model.cuda()
optimizer = optim.Adam(params=model.parameters())

In [45]:
batch = sampling.generate_training_batch(batch_size=batch_size,
                                         test_hashes=test_hashes,
                                         random_state=train_r)

In [49]:
type_constr = torch.cuda
batch_size = batch['N'].size()[0]
s = State(**batch)
s.cuda()

In [51]:
sieve = alive_sieve.AliveSieve(batch_size=batch_size, enable_cuda=True)

# next two tensofrs wont be sieved, they will stay same size throughout
# entire batch, we will update them using sieve.out_idxes[...]
rewards = type_constr.FloatTensor(batch_size, 3).fill_(0)
num_steps = type_constr.LongTensor(batch_size).fill_(10)
term_matches_argmax_count = 0
utt_matches_argmax_count = 0
utt_stochastic_draws = 0
num_policy_runs = 0
prop_matches_argmax_count = 0
prop_stochastic_draws = 0

entropy_loss_by_agent = [
    Variable(type_constr.FloatTensor(1).fill_(0)),
    Variable(type_constr.FloatTensor(1).fill_(0))
]

In [58]:
# Forward step
agent = 0
_prev_proposal = s.last_proposal
nodes, term_a, s.m_prev, this_proposal, _entropy_loss, \
_term_matches_argmax_count, _utt_matches_argmax_count, _utt_stochastic_draws, \
_prop_matches_argmax_count, _prop_stochastic_draws = model(
    pool=Variable(s.pool),
    utility=Variable(s.utilities[:, agent]),
    m_prev=Variable(s.m_prev),
    prev_proposal=Variable(_prev_proposal),
    testing=False)

  probs = F.softmax(logits)
  probs = F.softmax(logits)


In [160]:
# Reward computation
utility = s.utilities[:, agent]
rewards_batch = type_constr.FloatTensor(batch_size, 3).fill_(0)
reward_eligible_mask = term_a.view(batch_size).clone().byte()
exceeded_pool, _ = ((this_proposal - s.pool) > 0).max(1)
proposer = 1 - agent
accepter = agent
proposal = torch.zeros(batch_size, 2, 3).long()
proposal[:, proposer] = this_proposal
proposal[:, accepter] = s.pool - this_proposal
max_utility, _ = s.utilities.max(1)

In [161]:
reward_eligible_idxes = reward_eligible_mask.nonzero().long().view(-1)
for b in reward_eligible_idxes:
    raw_rewards = torch.FloatTensor(2).fill_(0)
    for i in range(2):
        raw_rewards[i] = s.utilities[b, i].cpu().dot(proposal[b, i].cpu())
        # penalize linguistic variety
        utt_bigrams = set(bigrams(s.m_prev[b].cpu()))
        raw_rewards[i] -= len(utt_bigrams) / 15.0
        

    scaled_rewards = torch.FloatTensor(3).fill_(0)

    # we always calculate the prosocial reward
    actual_prosocial = raw_rewards.sum()
    available_prosocial = max_utility[b].cpu().dot(s.pool[b].cpu())
    if available_prosocial != 0:
        scaled_rewards[2] = actual_prosocial / available_prosocial
            
    for i in range(2):
        max_agent = s.utilities[b, i].cpu().dot(s.pool[b].cpu())
        if max_agent != 0:
            scaled_rewards[i] = raw_rewards[i] / max_agent

    rewards_batch[b] = scaled_rewards

In [152]:
#torch.tensor(list(map(lambda utt: len(set(bigrams(utt.tolist()))), s.m_prev))) / 15.0
len(set(bigrams(s.m_prev[b].cpu()))) / 15.0

0.3333333333333333

In [172]:
s.utilities[:, agent][0], proposal[:, agent][0]

(tensor([0, 2, 4], device='cuda:0'), tensor([3, 2, 1]))