# End-To-End Pipeline

## Simulation

In [1]:
import pdb
import re
import random
import utils
import numpy as np
import torch
import torch.nn as nn
import domain
import pandas as pd

from torch import optim
from torch import autograd
from ipywidgets import interact
from agent import *
from utils import ContextGenerator
from dialog import Dialog, DialogLogger
from models.rnn_model import RnnModel
from models.latent_clustering_model import LatentClusteringPredictionModel, BaselineClusteringModel
from agent import RnnAgent, RnnRolloutAgent, RlAgent, HierarchicalAgent
from domain import get_domain
from nltk import ngrams
from tqdm.notebook import tqdm

In [2]:
def get_agent_type(model, smart=False):
    if isinstance(model, LatentClusteringPredictionModel):
        if smart:
            return LatentClusteringRolloutAgent
        else:
            return LatentClusteringAgent
    elif isinstance(model, RnnModel):
        if smart:
            return RnnRolloutAgent
        else:
            return RnnAgent
    elif isinstance(model, BaselineClusteringModel):
        if smart:
            return BaselineClusteringRolloutAgent
        else:
            return BaselineClusteringAgent
    else:
        assert False, 'unknown model type: %s' % (model)

In [3]:
class Arguments:
    alice_model_file = 'rnn_model.th'
    alice_forward_model_file = ''
    bob_model_file = 'rnn_model.th'
    bob_forward_model_file = ''
    context_file = 'data/negotiate/selfplay.txt'
    temperature = 0.5
    pred_temperature =1.0
    verbose = False
    seed = 1
    score_threshold = 6
    max_turns = 20
    log_file = ''
    smart_alice = False
    diverse_alice = False
    rollout_bsz = 3
    rollout_count_threshold = 3
    smart_bob = False
    selection_model_file = 'selection_model.th'
    rollout_model_file = ''
    diverse_bob = False
    cuda = True
    domain = 'object_division'
    visual = False
    eps = 0.0
    data = 'data/negotiate'
    unk_threshold = 20
    bsz = 16
    validate = False
    ref_text = ''
    rl_lr = 0.002
    rl_clip = 2.0
    lr = 0.1
    gamma = 0.99
    eps = 0.5
    clip = 0.1
    momentum = 0.1
    sep_sel = True
    unk_threshold = 20
    sv_train_freq = 1
    
args = Arguments()

In [4]:
utils.use_cuda(args.cuda)
utils.set_seed(args.seed)

In [5]:
alice_model = utils.load_model(args.alice_model_file)
alice_ty = get_agent_type(alice_model, args.smart_alice)
alice = alice_ty(alice_model, args, name='Alice', train=True, diverse=args.diverse_alice)
alice.vis = args.visual

bob_model = utils.load_model(args.bob_model_file)
bob_ty = get_agent_type(bob_model, args.smart_bob)
bob = bob_ty(bob_model, args, name='Bob', train=False, diverse=args.diverse_bob)
bob.vis = False



In [6]:
joe_model = utils.load_model(args.bob_model_file)
joe_ty = get_agent_type(bob_model, args.smart_bob)
joe = bob_ty(bob_model, args, name='Joe', train=False, diverse=args.diverse_bob)
joe.vis = False

In [12]:
dialog = Dialog([alice, bob], args)
logger = DialogLogger(verbose=args.verbose, log_file=args.log_file)
ctx_gen = ContextGenerator(args.context_file)

#dialog2 = Dialog([alice, joe], args)
domain = get_domain(args.domain)
corpus = alice_model.corpus_ty(domain, args.data, freq_cutoff=args.unk_threshold,
                               verbose=True, sep_sel=args.sep_sel)
engine = alice_model.engine_ty(alice_model, args)
alice.engine = engine

dataset data/negotiate/train.txt, total 687919, unks 8718, ratio 1.27%
dataset data/negotiate/val.txt, total 74653, unks 914, ratio 1.22%
dataset data/negotiate/test.txt, total 70262, unks 847, ratio 1.21%


In [13]:
validset, validset_stats = corpus.valid_dataset(args.bsz)
trainset, trainset_stats = corpus.train_dataset(args.bsz)

In [9]:
import wandb

wandb.init(project="goal-based-negotiating-agents")

W&B Run: https://app.wandb.ai/tropdeep/goal-based-negotiating-agents/runs/3o4l66wy

In [10]:
wandb.watch(alice_model)

ValueError: You can only call `wandb.watch` once per model.  Pass a new instance of the model if you need to call wandb.watch again in your code.

In [14]:
n = 0
rew_freq = 2
all_rewards = []
norm_reward = 0
args.sv_train_freq = 1
args.verbose = False
utt_reward = 0
for ctxs in tqdm(ctx_gen.iter(), total=len(ctx_gen.ctxs)):
    if args.sv_train_freq > 0 and n % args.sv_train_freq == 0:
        batch = random.choice(trainset)
        engine.model.train()
        out, loss = engine.train_batch(batch, reward=utt_reward)
        engine.model.eval()
        wandb.log({"loss": loss})
    if n % rew_freq == 0:
        logger.dump('=' * 80)
        conv, agree, rewards = dialog.run(ctxs, logger)
        #dialog2.run(ctxs, logger)
        logger.dump('=' * 80)
        logger.dump('')
        
        # compute context rewards
        reward, partner_reward = rewards
        diff = reward - partner_reward
        all_rewards.append(diff)
        r = (diff - np.mean(all_rewards)) / max(1e-4, np.std(all_rewards))
        g = r
        rewards = []
        for _ in alice.logprobs:
            rewards.append(g)
            g = g * args.gamma
        ctx_norm_reward = 0
        for lp, r in zip(alice.logprobs, rewards):
            ctx_norm_reward -= lp.item() * r
        #print('context reward:', ctx_norm_reward)
        
        # compute utterance rewards
        utt_reward = 0
        for utterance in conv:
            unigrams = pd.Series(ngrams(utterance, 1))
            if len(conv) < 2:
                utt_reward -= 0.5
                continue
            utt_reward += unigrams.count() - 8 if unigrams.count() < 8 else 0
            bigrams = pd.Series(ngrams(utterance, 2))
            utt_reward -= bigrams.value_counts().std()
            trigrams = pd.Series(ngrams(utterance, 3))
            utt_reward -= trigrams.value_counts().std()
        #print('utterance reward:', utt_reward)
        utt_reward = max(-2.0, utt_reward) * args.gamma
        
        # logs
        wandb.log({'utterance-reward': utt_reward,
                   'ctx-norm-reward': ctx_norm_reward})
            
               
        #input()
    n += 1

HBox(children=(IntProgress(value=0, max=4086), HTML(value='')))




In [None]:
args.verbose = True
ctxs = random.choice(ctx_gen.ctxs)
logger = DialogLogger(verbose=args.verbose, log_file=args.log_file)
dialog = Dialog([alice, bob], args)
logger.dump('=' * 80)
conv, agree, rewards = dialog.run(ctxs, logger)
#dialog2.run(ctxs, logger)
logger.dump('=' * 80)
logger.dump('')

Alice : book=(count:4 value:2) hat=(count:1 value:1) ball=(count:1 value:1)
Bob   : book=(count:4 value:1) hat=(count:1 value:6) ball=(count:1 value:0)
--------------------------------------------------------------------------------
Alice : i need the books and the hats <eos>
Bob   : i need the hat <eos>
Alice : i need the other hat to your your two my much give up <eos>
Bob   : i need the hat and the ball <eos>


In [24]:
engine.crit(out, tgt)

tensor(961.6806, grad_fn=<NllLossBackward>)

In [29]:
lsf = nn.LogSoftmax()
torch.log(-out @ engine.crit.w)

tensor([ 9.3905, 11.0875, 11.2919, 10.6140, 11.1170, 10.4049, 11.7149, 10.3151,
        11.3495, 10.9880, 11.8648, 10.4176, 11.2139, 11.0625, 10.5209, 11.3971,
        10.9805, 11.2328,     nan, 10.2620, 10.4540, 11.5460, 11.6435, 11.7716,
        11.5797, 11.4207, 10.8836, 11.1662, 11.2781, 11.7557, 10.5368, 10.4087,
        12.1111,     nan, 10.0322, 11.5399,     nan, 11.4545, 11.2849, 11.9719,
        11.7945, 10.8136,  9.9752, 11.5829, 12.0881, 10.8099, 12.0942, 10.4168,
        12.5171,  9.7051, 11.4713, 11.3777,  8.7401, 10.5852, 11.6754,     nan,
        12.6202, 11.6134, 10.3867, 11.9602, 11.8345, 11.2651, 12.1444, 11.4711,
        12.3260, 10.7483, 10.8352, 10.8016, 11.5828, 11.7422, 10.1615, 10.7003,
        11.3029, 12.1950, 11.6886, 12.3887, 12.1229, 11.6933, 12.0276, 12.3525,
        12.1193,     nan, 10.5496, 11.5089, 12.4797, 11.7544, 10.1133, 11.1836,
        10.6005, 11.6757, 12.2564, 11.9262, 11.9199, 12.3477, 12.2010, 11.8272,
        11.3114, 11.5742, 12.3197, 11.69

In [36]:
alice_model.word_dict

<data.Dictionary at 0x7f18f9fca5d0>

In [75]:
word_batch = pd.DataFrame()
for j, col in enumerate(inpt):
    col = corpus.word_dict.i2w(col)
    word_batch[j] = pd.Series(col)

word_batch

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,97,98,99,100,101,102,103,104,105,106
0,YOU:,i'd,like,both,balls,;,maybe,a,hat,.,...,-,i,can,accept,that,one,!,<eos>,THEM:,<selection>
1,THEM:,i'd,like,the,hat,",",the,balls,and,a,...,deal,?,<eos>,YOU:,no,deal,then,<eos>,THEM:,<selection>
2,YOU:,i'll,take,the,books,and,hat,if,you,want,...,i,take,the,books,and,1,ball,<eos>,YOU:,<selection>
3,YOU:,i,would,like,the,ball,",",the,hat,",",...,no,deal,.,<eos>,THEM:,no,deal,<eos>,YOU:,<selection>
4,YOU:,i'll,take,books,and,ball,",",you,can,have,...,ball,and,books,",",nothing,else,works,<eos>,THEM:,<selection>
5,THEM:,would,you,accept,two,balls,?,<eos>,YOU:,rather,...,<eos>,THEM:,deal,<eos>,YOU:,deal,!,<eos>,THEM:,<selection>
6,THEM:,you,take,all,balls,and,i,keep,the,rest,...,YOU:,no,deal,",",ok,keep,talking,<eos>,THEM:,<selection>
7,THEM:,how,about,2,hats,and,a,basketball,?,<eos>,...,deal,<eos>,THEM:,ok,",",sounds,good,<eos>,YOU:,<selection>
8,THEM:,i'd,like,both,balls,;,maybe,a,hat,.,...,-,i,can,accept,that,one,!,<eos>,YOU:,<selection>
9,THEM:,i,would,like,the,hat,and,the,balls,<eos>,...,THEM:,:,-,),<eos>,YOU:,<unk>,.,<eos>,THEM:


In [59]:
batch = random.choice(trainset)

In [61]:
engine.model.train()

ctx, inpt, tgt, sel_tgt = batch
ctx = Variable(ctx)
inpt = Variable(inpt)
tgt = Variable(tgt)
sel_tgt = Variable(sel_tgt)

out, sel_out = alice_model(inpt, ctx)

In [85]:
out.shape, tgt.shape, sel_out.shape, tgt.shape

(torch.Size([1712, 463]),
 torch.Size([1712]),
 torch.Size([96, 18]),
 torch.Size([1712]))

In [84]:
list(map(lambda b: b.shape, batch))

[torch.Size([6, 16]),
 torch.Size([107, 16]),
 torch.Size([1712]),
 torch.Size([96])]

In [100]:
for i, c in enumerate(conv):
    unigrams = pd.Series(ngrams(c, 1))
    bigrams = pd.Series(ngrams(c, 2))
    trigrams = pd.Series(ngrams(c, 3))
    print(f'{i}\n', ' '.join(c))
    print('number of unigrams:', len(unigrams), end=',\t')
    print('number of bigrams:', len(bigrams), end=',\t')
    print('number of trigrams:', len(trigrams))
    print('===================')

0
 i would like the ball and the books . <eos>
number of unigrams: 10,	number of bigrams: 9,	number of trigrams: 8
1
 then if i can have three balls that would like balls like a talk it`s split your make ? offer all of three books ? <eos>
number of unigrams: 26,	number of bigrams: 25,	number of trigrams: 24
2
 i need the ball and the books <eos>
number of unigrams: 8,	number of bigrams: 7,	number of trigrams: 6
3
 then no ( books ? <eos>
number of unigrams: 6,	number of bigrams: 5,	number of trigrams: 4
4
 i need the ball and the books <eos>
number of unigrams: 8,	number of bigrams: 7,	number of trigrams: 6
5
 then no no no deal no <eos>
number of unigrams: 7,	number of bigrams: 6,	number of trigrams: 5
6
 no deal <eos>
number of unigrams: 3,	number of bigrams: 2,	number of trigrams: 1
7
 both all of each my . <eos>
number of unigrams: 7,	number of bigrams: 6,	number of trigrams: 5
8
 no deal <eos>
number of unigrams: 3,	number of bigrams: 2,	number of trigrams: 1
9
 i no <eos>
number 

## Exploration