In [75]:
import numpy as np
import matplotlib.pyplot as plt

# Network
import torch
from torch import autograd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

# Optimizer
import torch.optim as optim

In [76]:
# Game setup
num_types = 3    # Number of item types
max_item = 5     # Maximum number of each item in a pool
max_utility = 10 # Maximum utility value for agents
games = 128      # Number of games per episode

# Linguistic channel
num_vocab = 10   # Symbol vocabulary size for linguistic channel
len_message = 6  # Linguistic message length

# Appendix
lambda1 = 0.05  # Entropy regularizer for pi_term, pi_prop
lambda2 = 0.001 # Entropy regularizer for pi_utt
smoothing_const = 0.7 # Smoothing constant for the exponential moving average baseline

In [77]:
# Sample an item pool for a game
def create_item_pool(num_types, max_item):
    # Possible to have zero items?
    pool = np.random.randint(0, max_item+1, (games,num_types))
    return pool
        
# Sample agent utility
def create_agent_utility(num_types, max_utility):
    utility = np.zeros((games,num_types)) # Initialize zero vector
    
    for i in range (games):
        while np.sum(utility[i,:]) == 0:   # At least one item has non-zero utility
            utility[i,:] = np.random.randint(0, max_utility+1, num_types)
    return utility

# Calculate reward
def reward(share, utility):
    return np.dot(utility, share)

In [845]:
class combined_policy(nn.Module):
    def __init__(self, embedding_dim = 100, batch_size = 128, num_layers = 1, bias = True, batch_first = False, dropout = 0, bidirectional = False):
        super(combined_policy, self).__init__()
        # Save variables
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        
        # Numerical encoder
        self.encoder1 = nn.Embedding(max_utility, embedding_dim)
        # Linguistic encoder
        self.encoder2 = nn.Embedding(num_vocab, embedding_dim)
        
        # Item context LSTM
        self.lstm1 = nn.LSTM(embedding_dim, embedding_dim, num_layers, bias, batch_first, dropout, bidirectional)
        # Linguistic LSTM
        self.lstm2 = nn.LSTM(embedding_dim, embedding_dim, num_layers, bias, batch_first, dropout, bidirectional)
        # Proposal LSTM
        self.lstm3 = nn.LSTM(embedding_dim, embedding_dim, num_layers, bias, batch_first, dropout, bidirectional)
        
        # Feed-forward
        self.ff = nn.Linear(3*embedding_dim, embedding_dim)
        
        # Termination policy
        self.policy_term = nn.Linear(embedding_dim, 1)
        # Linguistic policy
        self.policy_ling = nn.LSTM(embedding_dim, embedding_dim, num_layers, bias, batch_first, dropout, bidirectional)
        self.ff_ling = nn.Linear(embedding_dim, num_vocab)
        # Proposal policies
        self.policy_prop = []
        for i in range(num_types):
            ff = nn.Linear(embedding_dim, max_item)
            self.policy_prop.append(ff)
        
    def forward(self, x, test):
        # Item context
        x1 = x[0]
        # Previous linguistic message
        x2 = x[1]
        # Previous proposal
        x3 = x[2]  
        
        print(x1.size())
        # Initial embedding
        x1 = self.encoder1(x1).transpose(0,1)
        x2 = self.encoder2(x2).transpose(0,1)
        x3 = self.encoder1(x3).transpose(0,1)
            
        # LSTM for item context
        h = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial hidden
        c = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial cell

        for i in range(x1.size()[0]):
            _, (h,c) = self.lstm1(x1[i].view(1,self.batch_size,self.embedding_dim),(h,c))
        x1 = h
        
        # LSTM for linguistic
        h = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial hidden
        c = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial cell

        for i in range(x2.size()[0]):
            _, (h,c) = self.lstm2(x2[i].view(1,self.batch_size,self.embedding_dim),(h,c))
        x2 = h
        
        # LSTM for proposal
        h = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial hidden
        c = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial cell

        for i in range(x3.size()[0]):
            _, (h,c) = self.lstm2(x3[i].view(1,self.batch_size,self.embedding_dim),(h,c))
        x3 = h

        # Concatenate side-by-side
        x = torch.cat([x1,x2,x3],2)

        # Feedforward
        h = self.ff(x)
        h = F.relu(h)

        # Termination
        p_term = F.sigmoid(self.policy_term(h)).view(self.batch_size,1)
        
        entropy_term = -(p_term * p_term.log2()) - (torch.ones(128,1)-p_term * (torch.ones(128,1)-p_term.log2()))
    
        if test:
            # Greedy
            term = torch.round(p_term).long()
        else:
            # Sample
            term = torch.bernoulli(p_term).long()
        
        # Linguistic construction
        h = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial hidden state
        c = torch.zeros(1,self.batch_size,self.embedding_dim) # Initial cell state
        letter = torch.zeros(self.batch_size,1).long() # Initial letter (dummy)
        entropy_letter = torch.zeros([self.batch_size,num_vocab])
        
        message = torch.zeros(self.batch_size,len_message) # Message
        for i in range(len_message):
            embedded_letter = self.encoder2(letter)

            _, (h,c) = self.policy_ling(embedded_letter.view(1,self.batch_size,self.embedding_dim),(h,c))
            logit = self.ff_ling(h)
            p_letter = F.softmax(logit,dim=2).view(self.batch_size,num_vocab)
            
            entropy_letter[:,i] = -1*(torch.sum(p_letter[i],0,keepdim=True) * torch.sum(p_letter[i],0,keepdim=True).log2())
            
            if test:
                # Greedy
                letter = p_letter.argmax(dim=1).view(self.batch_size,1)
            else:
                # Sample
                letter = torch.polynomial(p_letter,1)
            message[:,i] = letter.squeeze()
            
        entropy_letter = torch.sum(letter,1)     
   
        # Proposal
        p_prop = torch.zeros(num_types,self.batch_size,max_item)
        prop = torch.zeros([self.batch_size,num_types]).long()
        entropy_prop= torch.zeros([self.batch_size,num_types])
        
        for i in range(num_types):
            blah = F.sigmoid(self.policy_prop[i](h))
            p_prop[i] = F.sigmoid(self.policy_prop[i](h))
            
            entropy_prop[:,i] = -1*(torch.sum(p_prop[i],1) * torch.sum(p_prop[i],1).log2())
            if test:
                # Greedy
                prop[:,i] = p_prop[i].argmax(dim=1)
            else:
                # Sample
                prop[0][i] = torch.multinomial(p_prop,1)
            
        entropy_prop = torch.sum(entropy_prop,1)           

        return (term,message.long(),prop)
    
net = combined_policy()

In [846]:
x = torch.randint(0,max_item,[128,6]).long()
y = torch.randint(0,num_vocab,[128,6]).long()
z = torch.randint(0,max_item,[128,3]).long()

In [847]:
blah = net([x,y,z],True)

print(blah[0].type())

torch.Size([128, 6])
torch.LongTensor


In [849]:
# Game
games = 128

# Set up truncated poisson - N is the number of steps this game
lam = 7
max_N = 10
min_N = 4
s = np.random.poisson(lam,games)
s = np.minimum(s,max_N)
s = np.maximum(s,min_N)
N = np.random.choice(s,1)

#Initialization
pool = create_item_pool(num_types, max_item)
utility_1 = create_agent_utility(num_types, max_utility)
utility_2 = create_agent_utility(num_types, max_utility)
item_context_1 = np.hstack((pool,utility_1))
item_context_1 = torch.from_numpy(item_context_1).long()
item_context_2 = np.hstack((pool,utility_2))
item_context_2 = torch.from_numpy(item_context_2).long()

A1 = combined_policy()
A2 = combined_policy()

A1_e =  0.0
A1_m = torch.zeros([games,6]).long()
A1_p = torch.zeros([games,3]).long()
A2_e =  0.0
A2_m = torch.zeros([games,6]).long()
A2_p = torch.zeros([games,3]).long()

#Game
for i in range(N[0]):
    if i%2 == 0: 
        print(A1_e)
        if A1_e == 0:
            A1_e,A1_m,A1_p = A1([A2_m,A2_m,A2_p], True)
        else:
            break
    else:
        if A2_e == 0:
            A2_e,A2_m,A2_p = A2([A1_m,A1_m,A1_p], True)
        else:
            break
print(A1_e,A1_m,A1_p,A2_e,A2_m,A2_p)

0.0
torch.Size([128, 6])
torch.Size([128, 6])
tensor([[ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
  

RuntimeError: bool value of Tensor with more than one value is ambiguous