In [16]:
import pickle
from utils.analysis_from_interaction import *
from egg.core.language_analysis import Disent
from language_analysis_local import TopographicSimilarityHierarchical, encode_input_for_topsim_hierarchical

# calculate metrics from stored interactions

In [12]:
control = True # whether original or control simulations are evaluated

if control:
    datasets = ('(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)')
    n_attributes = (4, 4, 4, 4, 4, 4, 4)
    n_values = (8, 8, 8, 8, 8, 8, 8)
    n_epochs = 300
    paths = ['results/(4,8)_sample_scaling_10_balanced_True_vsf_1/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_1/',
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_2/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_2/',
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_3/', 
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_4/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_4/',]
    
else: 
    datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
    n_attributes = (3, 3, 3, 4, 4, 5)
    n_values = (4, 8, 16, 4, 8, 4)
    n_epochs = 300
    paths = ['results/' + d + '_sample_scaling_10_balanced_False_vsf_3/' for d in datasets]



In [14]:
# run once to rename all interaction files 'interaction'
# simulations were run 2021 or 2022, so they have '2021' or '2022' in name
# import os
# 
# for d, dataset in enumerate(datasets): 
#     for folder in ['standard', 'zero_shot']:
#         for run in range(5):
#             directories = os.listdir(paths[d] + folder + '/' + str(run) + '/')
#             for direct in directories: 
#                 if '2021' in direct or '2022' in direct: 
#                     os.rename(paths[d] + folder + '/' + str(run) + '/' + direct, 
#                               paths[d] + folder + '/' + str(run) + '/interactions')
# 

### entropy scores: MI, effectiveness, efficiency

In [None]:
for d in range(len(datasets)):
    
    for run in range(5): 
        
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = information_scores(interaction, attributes, values, normalizer="arithmetic")
        
        pickle.dump(scores, open(path_to_run + 'entropy_scores.pkl', 'wb'))
    

###  message length

In [None]:
# we evaluated message length per hierarchy level after training but 
# you can also use the HierarchicalMessageLength callback and store the results 

for d in range(len(datasets)):
    
    for run in range(5): 
        
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = message_length_per_hierarchy_level(interaction, attributes)
        
        pickle.dump(scores, open(path_to_run + 'message_length_hierarchical.pkl', 'wb'))

###  symbol redundancy

In [None]:
for d in range(len(datasets)):
    
    attributes = n_attributes[d]
    values = n_values[d]
    vs_factor = int(paths[d][-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    for run in range(5): 
        
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        symbol_f = np.load(path_to_run + 'symbols_pernsame.npy')
        path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        interaction = torch.load(path_to_interaction)
        redundancy, MI = symbol_frequency(interaction, attributes, values, vocab_size)
        
        scores = {'symbol_redundancy': redundancy, 'MI_symbol-attribute_value': MI}
        
        pickle.dump(scores, open(path_to_run + 'symbol_redundancy.pkl', 'wb'))

###  compositionality scores: topsim, posdis, bosdis

In [None]:
# topsim
# although topsim values are stored throughout training if callbacks are verbose, we reevaluate the
# final topsim scores with more data points 

samples = 5000
for d, dataset in enumerate(datasets):
    
    dim = [n_values[d]]*n_attributes[d]
    
    for run in range(5):
        print("dataset", dataset, "run", run)
        
        topsim_final = {}
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        
        TOPSIM = TopographicSimilarityHierarchical(dim, is_gumbel=True)
        
        for mode in ['train', 'test']:

            if mode == 'train':
                interaction = torch.load(path_to_run + 'interactions/train/interactions_epoch300')
            elif mode == 'test':
                interaction = torch.load(path_to_run + 'interactions/validation/interactions_epoch300')
                
                  
            messages = interaction.message.argmax(dim=-1)
            sender_input = interaction.sender_input

            max_relevant = np.where(np.sum(sender_input[:,-len(dim):].numpy(), axis=1)==0)[0]
            messages_max_relevant = messages[max_relevant]
            sender_input_max_relevant = sender_input[max_relevant]

            messages = [msg.tolist() for msg in messages]
            messages_max_relevant = [msg.tolist() for msg in messages_max_relevant]

            encoded_input = encode_input_for_topsim_hierarchical(sender_input, dim)
            encoded_input_max_relevant = encode_input_for_topsim_hierarchical(sender_input_max_relevant, dim)
            topsim = TOPSIM.compute_topsim(encoded_input[0:samples], messages[0:samples])
            topsim_max_relevant = TOPSIM.compute_topsim(encoded_input_max_relevant[0:samples], 
                                                        messages_max_relevant[0:samples])
            print('... topsim computed')

            topsim_final['topsim_' + mode] = topsim
            topsim_final['topsim_max_relevance_' + mode] = topsim_max_relevant
    
        pickle.dump(topsim_final, open(path_to_run +  "topsim_final.pkl", "wb" ) )
        print(topsim_final)        

In [None]:
# use Disent callback from egg

for d in range(len(datasets)): 
    
    path = paths[d]
    dim = [n_values[d]] * n_attributes[d]
    n_features = n_attributes[d] * n_values[d]
    vs_factor = int(path[-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    print("data set", dim)
    
    for run in range(5):
        
        posdis_bosdis = {}
    
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        interaction = torch.load(path_to_run + 'interactions/train/interactions_epoch300')
        
        messages = interaction.message.argmax(dim=-1)
        sender_input = interaction.sender_input
        objects = sender_input[:,:-n_attributes[d]]
        relevance_vectors = sender_input[:,-n_attributes[d]:]

        objects_max_relevance = torch.tensor(k_hot_to_attributes(
            objects[torch.sum(relevance_vectors, dim=1) == 0], dim[0]))
        messages_max_relevance = messages[torch.sum(relevance_vectors, dim=1) == 0]
        
        posdis_max_relevance = Disent.posdis(objects_max_relevance, messages_max_relevance)
        bosdis_max_relevance = Disent.bosdis(objects_max_relevance, messages_max_relevance, vocab_size)
        
        sender_input_encoded = torch.tensor(encode_input_for_topsim_hierarchical(sender_input, dim))
        objects = torch.tensor(k_hot_to_attributes(sender_input_encoded, n_values[d]+1))
        
        posdis = Disent.posdis(objects, messages)
        bosdis = Disent.bosdis(objects, messages, vocab_size)
        
        posdis_bosdis['posdis_max_relevance'] = posdis_max_relevance
        posdis_bosdis['bosdis_max_relevance'] = bosdis_max_relevance
        posdis_bosdis['posdis'] = posdis
        posdis_bosdis['bosdis'] = bosdis
    
        pickle.dump(posdis_bosdis, open(path_to_run + "posdis_bosdis.pkl", "wb" ) )

### co-occurrences

In [6]:
for d in range(len(datasets)):
    
    vs_factor = int(paths[d][-2])
    
    for run in range(5): 
        
        path_to_run = paths[d] + 'standard/' + str(run) + '/'
        path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        
        scores = cooccurrence_per_hierarchy_level(interaction, attributes, values, vs_factor)
        
        pickle.dump(scores, open(path_to_run + 'normalized_cooccurrence.pkl', 'wb'))
    