# three-way entropy analysis 

This file tests a three-way entropy analysis between the sender's input objects (objects), the messages, and the receiver's selected objects (selections). 

We show for the five different conditions that the conditional mutual information between objects and selections given messages I(O,S|M) is approximately zero, and that the mutual information between objects and messages, I(O,M), is approximately equal to the mutual informatino between messages and selections, I(M,S). As explained in the paper, this means that an analysis of sender (objects-messages) and receiver (messages-selections) is symmetric, and we therefore only analyze the sender part. These analyses can be found in the 'lanugage_analysis.ipynb' file. 

The reported scores are not normalized. 

In [1]:
import tensorflow as tf
import numpy as np
from nn import agents
from utils.load_data import collect_examples_per_class
from utils.entropy_scores import ThreeWayEntropyScores
from utils.referential_data import make_referential_data
from utils.config import get_cnn_paths

### calculate the scores 

In [2]:
# Load data, we use a subset of 50 examples per class to reduce computation time

train_data, _ = collect_examples_per_class()
train_labels = tf.one_hot(np.concatenate([[l]*50 for l in range(64)]), depth=64).numpy()
sender_input, receiver_input, ref_labels, td_labels, permutation = make_referential_data(
    [train_data, train_data, train_labels], return_permutation=True)

td_label_array = np.array([np.argmax(l, axis=1) for l in td_labels])
td_label_array = np.transpose(td_label_array)

ordered_labels = np.zeros_like(td_label_array) # needed to identify the class labels of the receiver's selections
for i, perm in enumerate(permutation):
    ordered_labels[i,:] = td_label_array[i,perm]

In [3]:
# for all five conditions (DEFAULT, COLOR, SCALE, SHAPE, ALL), determine the entropy scores of interest

mode = 'language_emergence_basic'
conditions = ['default', 'color', 'scale', 'shape', 'all']
cnns = ['default0-0', 'color0-6', 'scale0-6', 'shape0-6', 'all0-8']
all_cnn_paths = get_cnn_paths()

scores = {}

for c, condition in enumerate(conditions):
    print('calculating condition: ' + str(condition))
    
    scores[condition] = {'I_OM': [], 'I_MS': [], 'I_OS_given_M': []}
    
    for run in range(10):
        path = 'results/' + mode + '/' + condition + str(run) + '/vs4_ml3/'
        
        # get messages and selections
        
        cnn_sender = tf.keras.models.load_model(all_cnn_paths[cnns[c]])
        cnn_receiver = tf.keras.models.load_model(all_cnn_paths[cnns[c]])
        vision_module_sender = tf.keras.Model(inputs=cnn_sender.input, 
                                              outputs=cnn_sender.get_layer('dense_1').output)
        vision_module_receiver = tf.keras.Model(inputs=cnn_receiver.input, 
                                                outputs=cnn_receiver.get_layer('dense_1').output)
        sender = agents.Sender(4, 3, 128, 128, activation='tanh', vision_module=vision_module_sender)
        receiver = agents.Receiver(4, 3, 128, 128, activation='tanh', n_distractors=2, 
                                   vision_module=vision_module_receiver, image_dim=64)
        sender.load_weights(path + 'sender_weights_epoch149/')
        receiver.load_weights(path + 'receiver_weights_epoch149/')
        
        messages, _, _, _, _ = sender.forward(sender_input, training=False)
        selections, _, _ = receiver.forward(messages, receiver_input, training=False)
        selections = ordered_labels[np.arange(len(ordered_labels)), selections.numpy()]
        
        # calculate entropy scores 
        entropy_scores = ThreeWayEntropyScores(messages.numpy(), td_labels[0], selections)
        all_scores = entropy_scores.calc_all_scores()
        scores[condition]['I_OM'].append(all_scores['I_OM'])
        scores[condition]['I_MS'].append(all_scores['I_MS'])
        scores[condition]['I_OS_given_M'].append(all_scores['I_OS_given_M'])  

calculating condition: default
calculating condition: color
calculating condition: scale
calculating condition: shape
calculating condition: all


### test assumptions

In [6]:
for condition in conditions: 
    print(condition)
    print('I(O,S|M):          ', np.round(np.mean(scores[condition]['I_OS_given_M']), 3))
    print('abs(I(O,M)-I(M,S)):', np.round(np.mean(np.abs(np.array(scores[condition]['I_OM']) - 
                                                        np.array(scores[condition]['I_MS']))), 3))

default
I(O,S|M):           0.003
abs(I(O,M)-I(M,S)): 0.003
color
I(O,S|M):           0.002
abs(I(O,M)-I(M,S)): 0.005
scale
I(O,S|M):           0.002
abs(I(O,M)-I(M,S)): 0.004
shape
I(O,S|M):           0.001
abs(I(O,M)-I(M,S)): 0.004
all
I(O,S|M):           0.0
abs(I(O,M)-I(M,S)): 0.0


$\rightarrow$ I(O,S|M) is approximately zero, and I(O,M) is approximately equal to I(M,S)

### all scores

For the sake of completeness here the mean values of I(O,M) and I(M,S) separately. 

In [7]:
for condition in conditions: 
    print(condition)
    print('I(O,M):', np.round(np.mean(scores[condition]['I_OM']), 3))
    print('I(M,S):', np.round(np.mean(scores[condition]['I_MS']), 3))

default
I(O,M): 2.938
I(M,S): 2.935
color
I(O,M): 2.657
I(M,S): 2.66
scale
I(O,M): 2.475
I(M,S): 2.476
shape
I(O,M): 2.711
I(M,S): 2.712
all
I(O,M): 3.125
I(M,S): 3.125
