In [1]:
import pandas as pd
import numpy as np
import os
from collections import Counter
from sklearn.metrics import normalized_mutual_info_score

In [2]:
def mutual_information(concepts, messages):
    """
    Measure mutual information between concepts c and messages m (assuming
    enumerability)
    """
    # Assign int values
    c2i = {}
    m2i = {}

    for c, m in zip(concepts, messages):
        if c not in c2i:
            c2i[c] = len(c2i)
        if m not in m2i:
            m2i[m] = len(m2i)

    cis = [c2i[c] for c in concepts]
    mis = [m2i[m] for m in messages]

    return normalized_mutual_info_score(cis, mis)

In [3]:
file_names = os.listdir('qualitative_analysis')
file_names.sort()
file_names

['perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_train.pkl',
 'perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_val.pkl',
 'perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_train.pkl',
 'perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_val.pkl',
 'perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_train.pkl',
 'perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_val.pkl',
 'perfect-teacher_pedagogical-demos_k=2_train-percent=0.8_log_train.pkl',
 'perfect-teacher_pedagogical-demos_k=2_train-percent=0.8_log_val.pkl']

In [4]:
unique_signals = {}
for file_name in file_names:
    df = pd.read_pickle(os.path.join('qualitative_analysis', file_name))
    #print(df.columns)
    reward_assignments = [str(assignment) for assignment in df['reward_assignment'].tolist()]
    messages = [str(message) for message in df['signal'].tolist()]
    mi = mutual_information(reward_assignments, messages)
    if 'train' in file_name[-10:]: print('Train')
    else: print('Val')
    print(file_name)
    print('MI', mi)
    print('unique reward assignments', len(set(reward_assignments)))
    print('unique signals', len(set(messages)))
    print('-------')
    
    unique_signals[file_name] = set(messages)

Train
perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_train.pkl
MI 0.0023970843317159796
unique reward assignments 28
unique signals 2
-------
Val
perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_val.pkl
MI 0.029824936985940523
unique reward assignments 546
unique signals 2
-------
Train
perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_train.pkl
MI 0.2082777097664975
unique reward assignments 460
unique signals 21
-------
Val
perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_val.pkl
MI 0.08089293834141445
unique reward assignments 116
unique signals 18
-------
Train
perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_train.pkl
MI 0.05752849231753397
unique reward assignments 28
unique signals 40
-------
Val
perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_val.pkl
MI 0.45040774041400183
unique reward assignments 547
unique signals 69
-------
Train
perfect-teacher_pedagogical-demos_k=2_train-percent=0.8_log_train.

In [5]:
set1 = unique_signals['perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_train.pkl']
set2 = unique_signals['perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_val.pkl']
print(len(set1))
print(len(set2))
print(len(set2) - len(set1.intersection(set2)))

40
69
52


In [73]:
lang_diverse_df = pd.read_pickle('qualitative_analysis/perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_val.pkl')
arbitrary_message = lang_diverse_df['signal'][0]
indices = []
for i in range(len(lang_diverse_df)):
    if np.array_equal(lang_diverse_df['signal'][i], arbitrary_message):
        indices.append(i)

reward_assignments_as_tuples = []
for reward_assignment in lang_diverse_df['reward_assignment'][indices].tolist():
    reward_assignment_as_tuple = tuple([tuple(reward_assignment[0]), tuple(reward_assignment[1])])
    reward_assignments_as_tuples.append(reward_assignment_as_tuple)
    
c = Counter(reward_assignments_as_tuples)
c.most_common(20)

[(((-2.0, 6.0, 2.0, -6.0), (3.0, 1.0, -1.0, -3.0)), 8),
 (((-2.0, 6.0, -6.0, 2.0), (3.0, -1.0, 1.0, -3.0)), 7),
 (((6.0, -6.0, 2.0, -2.0), (3.0, 1.0, -1.0, -3.0)), 6),
 (((-2.0, 2.0, -6.0, 6.0), (-3.0, 3.0, -1.0, 1.0)), 6),
 (((-6.0, 2.0, 6.0, -2.0), (-3.0, 3.0, 1.0, -1.0)), 6),
 (((-6.0, 2.0, 6.0, -2.0), (-1.0, 3.0, -3.0, 1.0)), 6),
 (((6.0, 2.0, -2.0, -6.0), (1.0, -3.0, 3.0, -1.0)), 6),
 (((6.0, -2.0, 2.0, -6.0), (-3.0, 1.0, 3.0, -1.0)), 5),
 (((-6.0, 6.0, 2.0, -2.0), (3.0, -3.0, 1.0, -1.0)), 5),
 (((2.0, -6.0, -2.0, 6.0), (-1.0, 3.0, -3.0, 1.0)), 5),
 (((-2.0, -6.0, 2.0, 6.0), (-1.0, 3.0, 1.0, -3.0)), 5),
 (((-6.0, 6.0, -2.0, 2.0), (-3.0, 3.0, 1.0, -1.0)), 5),
 (((-6.0, 2.0, 6.0, -2.0), (-3.0, -1.0, 1.0, 3.0)), 5),
 (((-2.0, 6.0, -6.0, 2.0), (-1.0, -3.0, 1.0, 3.0)), 5),
 (((-6.0, 2.0, -2.0, 6.0), (3.0, -1.0, -3.0, 1.0)), 5),
 (((-2.0, -6.0, 6.0, 2.0), (1.0, 3.0, -3.0, -1.0)), 4),
 (((2.0, 6.0, -6.0, -2.0), (1.0, 3.0, -3.0, -1.0)), 4),
 (((6.0, -2.0, -6.0, 2.0), (-1.0, 1.0, 3.0, -3.0

In [74]:
colors = ['red', 'blue', 'green', 'purple']
shapes = ['circle', 'square', 'triangle', 'pentagon']
value_counts = {}
for color in colors:
    value_counts[color] = []
for shape in shapes:
    value_counts[shape] = []

for reward_assignment in lang_diverse_df['reward_assignment'][indices].tolist():
    
    color_assignment = reward_assignment[0]
    shape_assignment = reward_assignment[1]
    for i in range(4):
        color = colors[i]
        reward_associated_with_color = color_assignment[i]
        value_counts[color].append(reward_associated_with_color)
        
        shape = shapes[i]
        reward_associated_with_shape = shape_assignment[i]
        value_counts[shape].append(reward_associated_with_shape)

for name, rewards in value_counts.items():
    print(name)
    print(np.mean(rewards))
    
means = []
for name, rewards in value_counts.items():
    means.append(np.mean(rewards))
print(means)

red
0.1375
blue
0.2
green
0.025
purple
-0.3625
circle
-0.1125
square
0.60625
triangle
-0.23125
pentagon
-0.2625
[0.1375, 0.2, 0.025, -0.3625, -0.1125, 0.60625, -0.23125, -0.2625]


In [72]:
for batch_idx in range(100):
    lang_diverse_df = pd.read_pickle('qualitative_analysis/perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_val.pkl')
    arbitrary_message = lang_diverse_df['signal'][32*batch_idx]
    indices = []
    for i in range(len(lang_diverse_df)):
        if np.array_equal(lang_diverse_df['signal'][i], arbitrary_message):
            indices.append(i)

    reward_assignments_as_tuples = []
    for reward_assignment in lang_diverse_df['reward_assignment'][indices].tolist():
        reward_assignment_as_tuple = tuple([tuple(reward_assignment[0]), tuple(reward_assignment[1])])
        reward_assignments_as_tuples.append(reward_assignment_as_tuple)

    c = Counter(reward_assignments_as_tuples)
    
    colors = ['red', 'blue', 'green', 'purple']
    shapes = ['circle', 'square', 'triangle', 'pentagon']
    value_counts = {}
    for color in colors:
        value_counts[color] = []
    for shape in shapes:
        value_counts[shape] = []
        
    for reward_assignment in lang_diverse_df['reward_assignment'][indices].tolist():
    
        color_assignment = reward_assignment[0]
        shape_assignment = reward_assignment[1]
        for i in range(4):
            color = colors[i]
            reward_associated_with_color = color_assignment[i]
            value_counts[color].append(reward_associated_with_color)

            shape = shapes[i]
            reward_associated_with_shape = shape_assignment[i]
            value_counts[shape].append(reward_associated_with_shape)

    #print(value_counts)
    means = []
    for name, rewards in value_counts.items():
        #print(name)
        means.append(np.mean(rewards))
    #print(means[6])
    if means[5] < means[7] or means[5] < means[6] or means[5] < means[4]:
        print(batch_idx)
        print(means)
                    

9
[1.25, 1.125, -0.75, -1.625, -0.0625, 0.0, 0.6875, -0.625]
13
[-0.125, -0.125, 0.5, -0.25, -0.1875, 0.5, -1.125, 0.8125]
35
[1.875, 0.5, -0.5, -1.875, 0.25, -0.125, 0.375, -0.5]


In [59]:
for reward_assignment in lang_diverse_df['reward_assignment'][indices].tolist():
    
    color_assignment = reward_assignment[0]
    shape_assignment = reward_assignment[1]
    for i in range(4):
        color = colors[i]
        reward_associated_with_color = color_assignment[i]
        value_counts[color].append(reward_associated_with_color)
        
        shape = shapes[i]
        reward_associated_with_shape = shape_assignment[i]
        value_counts[shape].append(reward_associated_with_shape)

for name, rewards in value_counts.items():
    print(name)
    print(np.mean(rewards))

red
0.4650735294117647
blue
-0.2959558823529412
green
0.14613970588235295
purple
-0.31525735294117646
circle
-0.19623161764705882
square
0.48483455882352944
triangle
-0.1213235294117647
pentagon
-0.16727941176470587


In [30]:
all_flattened = []
for i in range(len(lang_diverse_df)):
    flattened_array = lang_diverse_df['reward_assignment'][i].reshape(8,)
    all_flattened.append(flattened_array)
    
np.mean(np.array(all_flattened), axis=0)

array([ 0.4875  , -0.11375 ,  0.0275  , -0.40125 , -0.14375 ,  0.408125,
       -0.0675  , -0.196875])

In [33]:
for file_name in file_names:
    df = pd.read_pickle(os.path.join('qualitative_analysis', file_name))
    all_flattened = []
    for i in range(len(df)):
        flattened_array = df['reward_assignment'][i].reshape(8,)
        all_flattened.append(flattened_array)

    print(file_name)
    print(np.mean(np.array(all_flattened), axis=0))

perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_train.pkl
[-1.81375   0.14625   1.87875  -0.21125   0.22     -0.406875  0.234375
 -0.0475  ]
perfect-teacher_lang_max-message-len=12_train-percent=0.05_log_val.pkl
[ 0.1575  -0.1175   0.0175  -0.0575  -0.01875  0.01875  0.00625 -0.00625]
perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_train.pkl
[-0.12125  -0.01875   0.04375   0.09625   0.0775   -0.038125 -0.01375
 -0.025625]
perfect-teacher_lang_max-message-len=12_train-percent=0.8_log_val.pkl
[ 0.4875   -0.11375   0.0275   -0.40125  -0.14375   0.408125 -0.0675
 -0.196875]
perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_train.pkl
[-0.32625  -1.10125   1.21375   0.21375  -0.061875 -0.440625 -0.15625
  0.65875 ]
perfect-teacher_pedagogical-demos_k=2_train-percent=0.05_log_val.pkl
[ 0.01125   0.10875  -0.14625   0.02625  -0.04875   0.05625  -0.044375
  0.036875]
perfect-teacher_pedagogical-demos_k=2_train-percent=0.8_log_train.pkl
[-0.1325   -0.235  