In [1]:
import torch
from torch.nn import functional as F
from torch.distributions import Categorical
import csv
import egg.core as core
from string import ascii_lowercase, punctuation, digits, ascii_uppercase
from egg.core.util import load_interactions
import collections
from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Union
import json
import _jsonnet
from egg.zoo.channel.archs import Receiver, Sender
from reconstructionloss import ReconstructionLoss
from scoring import Scorer
import re
import os
import numpy as np

In [None]:
trainset = torch.load("datafinal/redlarge_train_set.tar")
training = trainset.tensors[0]
testset = torch.load("datafinal/redlarge_test_set.tar")
testing = testset.tensors[0]

In [None]:
class objectview(object):
    '''
    An object that makes a dictionary's keys attributes of the object, so they can
    be called by subscripting (mimics the functionality of argparse)
    '''
    def __init__(self, d):
        self.__dict__ = d

args = objectview(json.loads(_jsonnet.evaluate_file('interaction_config.jsonnet')))

In [None]:
vocab_size = args.signal_chars
embedding_size = args.embedding_size
hidden_size = args.hidden_size
cell_type = args.rnn_cell
signal_len = args.signal_len-1

lr = args.learning_rate
sender_entropy = args.sender_entropy

In [None]:
with open(f"dicts/redlarge_dict.json") as infile:
    grammar = json.load(infile)
    
initial_chars = ascii_lowercase + punctuation + digits
msg_chars = 'E'  # to mark EOS
msg_chars += initial_chars[:vocab_size-1]    

sender = Sender(n_features=160, n_hidden=hidden_size)

sender = core.RnnSenderReinforce(
    sender,
    vocab_size,
    embedding_size,
    hidden_size,
    cell='gru',
    max_len=signal_len,
    num_layers=1,
    )

receiver = Receiver(n_features=160, n_hidden=hidden_size)
receiver = core.RnnReceiverDeterministic(
    receiver,
    vocab_size,
    embedding_size,
    hidden_size,
    cell='gru',
    num_layers=1,
    )

loss = ReconstructionLoss(5, 32)
game = core.SenderReceiverRnnReinforce(
        sender,
        receiver,
        loss,
        sender_entropy_coeff=sender_entropy,
        receiver_entropy_coeff=0.0,
        length_cost=0.15,
        #beta1 = 45,
        #beta2 = 1.25
        )
optimizer = torch.optim.Adam(game.parameters(), lr=lr)

In [None]:
def load_ckp(checkpoint_fpath, model, optimizer):
    checkpoint = torch.load(checkpoint_fpath, map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint[1])
    sender = model.sender
    receiver = model.receiver
    optimizer.load_state_dict(checkpoint[2])
    return model, sender, receiver, optimizer, checkpoint[0]

In [None]:
directory = 'interaction_final_checkpoints/properredlargefifteencost/'

Compute metrics for:

a. Communication Accuracy

b. Message Length

c. Signal Uniqueness (using Jaccard Similarity)

In [None]:
accs = []
lens = []
jaccs = []

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        if ".DS_Store" in f:
            pass
        else:
            print(f)

            game, sender, receiver, optimizer, epoch = load_ckp(f, game, optimizer)

            m = re.search(r'rs([0-9]+)', f)
            print(m.group(1))
                
            scorer_train = Scorer(sender, receiver, training, game, grammar, msg_chars)
            scorer_train.get_interactions(training, True)

            lens.append(scorer_train.msg_len())
            
            acc, acc_or = scorer_train.dump()
            accs.append((acc, acc_or))
            jaccs.append([(scorer_train.uni_and_jaccard, scorer_train.uni_nonred_jaccard), (scorer_train.bi_and_jaccard, scorer_train.bi_nonred_jaccard), (scorer_train.tri_and_jaccard, scorer_train.tri_nonred_jaccard)])
                

Get Mean Full and Partial Accuracy

In [None]:
print(f"Mean full accuracy: {np.mean(acc)}")
print(f"Mean partial accuracy: {np.mean(acc_or)}")

Get Mean Message Lengths:
1. All messages
2. Partially redundant
3. Fully redundant
4. All redundant
5. Non-redundant

In [None]:
print(f"All messages: {np.mean([len[0] for len in lens])}\n")
print(f"Partially redundant messages: {np.mean([len[1] for len in lens])}\n")
print(f"Fully redundant messages: {np.mean([len[2] for len in lens])}\n")
print(f"All redundant messages: {np.mean([len[3] for len in lens])}\n")
print(f"All non-redundant messages: {np.mean([len[4] for len in lens])}\n")

Get Mean Signal Uniqueness:
1. Unigram
2. Bigram
3. Trigram

In [None]:
def get_signal_uniqueness(jaccs):
    unis = [j[0] for j in jaccs]
    bis = [j[1] for j in jaccs]
    tris = [j[2] for j in jaccs]
    uni_diffs = [(j[1]-j[0]) for j in unis]
    bi_diffs = [(j[1]-j[0]) for j in bis]
    tri_diffs = [(j[1]-j[0]) for j in tris]
    return np.mean(uni_diffs), np.mean(bi_diffs), np.mean(tri_diffs)

In [None]:
get_signal_uniqueness(jaccs)

Compute Predictive Ambiguity

In [None]:
all_recons = []

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        if ".DS_Store" in f:
            pass
        else:
            print(f)

            data = []

            game, sender, receiver, optimizer, epoch = load_ckp(f, game, optimizer)

            m = re.search(r'rs([0-9]+)', f)

            scorer_train = Scorer(sender, receiver, training, game, grammar, msg_chars)
            scorer_train.get_interactions(training, True)

            all_mean_entrops = []
            for i in range(len(scorer_train.reconent[0])):
                all_mean_entrops.append(scorer_train.reconent[:,i].mean().item())
            semirednoun_mean_entrops = []
            for i in range(len(scorer_train.semirednoun_reconent[0])):
                semirednoun_mean_entrops.append(scorer_train.semirednoun_reconent[:,i].mean().item())
            semiredverb_mean_entrops = []
            for i in range(len(scorer_train.semiredverb_reconent[0])):
                semiredverb_mean_entrops.append(scorer_train.semiredverb_reconent[:,i].mean().item())
            fullred_mean_entrops = []
            for i in range(len(scorer_train.red_reconent[0])):
                fullred_mean_entrops.append(scorer_train.red_reconent[:,i].mean().item())
            allred_mean_entrops = []
            for i in range(len(scorer_train.allred_reconent[0])):
                allred_mean_entrops.append(scorer_train.allred_reconent[:,i].mean().item())
            nonred_mean_entrops = []
            for i in range(len(scorer_train.other_reconent[0])):
                nonred_mean_entrops.append(scorer_train.other_reconent[:,i].mean().item())
            all_recons.append([all_mean_entrops, semirednoun_mean_entrops, semiredverb_mean_entrops, fullred_mean_entrops, allred_mean_entrops, nonred_mean_entrops])

Get Mean Predictive Ambiguity Values for each message type:
1. All Messages
2. Partially redundant (redundant noun)
3. Partially redundant (redundant verb)
4. Fully redundant
5. All redundant
5. Non-redundant

In [None]:
print(f"All messages: {[np.mean([j[x] for j in [a[0] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")
print(f"Partially redundant messages (redundant noun): {[np.mean([j[x] for j in [a[1] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")
print(f"Partially redundant messages (redundant verb): {[np.mean([j[x] for j in [a[2] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")
print(f"Fully redundant messages: {[np.mean([j[x] for j in [a[3] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")
print(f"All redundant messages: {[np.mean([j[x] for j in [a[4] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")
print(f"All non-redundant messages: {[np.mean([j[x] for j in [a[5] for a in all_recons]]) for x in range(len(all_recons[0][0]))]}\n")