In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
from collections import Counter

## Read in file

In [2]:
FILE_PATH_ROOT = '/home/dharakyu/signaling-bandits/outputs/' \
                '4x4-partial_chunk-2_chain-len-3_run-4_val'
NUM_SHAPES = 4
NUM_COLORS = 4

df = pd.read_pickle(FILE_PATH_ROOT + '.pkl')

## Get labels (utilities)

In [3]:
def convert_df_col_to_np_array(df, col_name):
    """
    Helper function to convert a column of a df to a np array
    """
    list_of_np_arrays = df[col_name].tolist()
    return np.concatenate(list_of_np_arrays, axis=0)

def extract_utilities_from_reward_matrix(stack):
    """
    Extract the utilities from the reward matrices
    """
    all_utilities = []
    for i in range(3200):
        row = stack[i, :]
        matrix = row.reshape(NUM_COLORS * NUM_SHAPES, -1)
        utilities = matrix[:, -2]
        all_utilities.append(utilities)
    return np.array(all_utilities)

In [4]:
reward_matrices = convert_df_col_to_np_array(df=df, col_name='reward_matrix') # (3200, NUM_COLORS*NUM_SHAPES*encoding_len)
utilities = extract_utilities_from_reward_matrix(reward_matrices) # (3200, NUM_COLORS*NUM_SHAPES)

## Get inputs (messages)

In [5]:
message_col_names = [col_name for col_name in df.columns.tolist() if 'message' in col_name]
print(message_col_names)

['message_0', 'message_1', 'message_2']


In [6]:
messages = [convert_df_col_to_np_array(df=df, col_name=col_name) for col_name in message_col_names]
messages = np.array(messages)
messages = np.swapaxes(messages, 0, 1)
messages.shape

(3200, 3, 320)

## Train the model

In [7]:
def get_MSE(messages, utilities):
    num_agents = messages.shape[1]
    X_train, X_test, y_train, y_test = train_test_split(messages, utilities)
    
    avg_mse_for_each_gen = []
    
    # for each generation of agents
    for gen in range(num_agents):
        #print('gen', gen)
        X_train_gen = X_train[:, gen, :]
        X_test_gen = X_test[:, gen, :]

        mses = []
        # for each object in the reward matrix
        for util_idx in range(y_train.shape[1]):
            clf = Ridge().fit(X_train_gen, y_train[:, util_idx])
            pred = clf.predict(X_test_gen)
            mse = clf.score(X_test_gen, y_test[:, util_idx])
            mses.append(mse)

        avg_mse_for_each_gen.append(np.mean(mses))
    return avg_mse_for_each_gen

In [8]:
avg_mse = get_MSE(messages, utilities)
avg_mse

[0.14474431121798054, 0.2022763465206406, 0.22453003194925117]

## Do it in a loop, for different experimental conditions

In [9]:
FILE_PATH_ROOT = '/home/dharakyu/signaling-bandits/outputs/' \
                '4x4-partial_chunk-2_chain-len-'
chain_lens = [2, 3, 4, 5]
run_nums = [1, 2, 3, 4, 5]

for chain_len in chain_lens:
    print('chain length', chain_len)
    mse_for_each_gen = []
    for run_num in run_nums:
        if chain_len == 2 and run_num >= 4: continue
        full_path = FILE_PATH_ROOT + '{chain_len}_run-{run_num}_val.pkl'.format(chain_len=chain_len, run_num=run_num)
        df = pd.read_pickle(full_path)
        reward_matrices = convert_df_col_to_np_array(df=df, col_name='reward_matrix') # (3200, NUM_COLORS*NUM_SHAPES*encoding_len)
        utilities = extract_utilities_from_reward_matrix(reward_matrices) # (3200, NUM_COLORS*NUM_SHAPES)
        message_col_names = [col_name for col_name in df.columns.tolist() if 'message' in col_name]
        messages = [convert_df_col_to_np_array(df=df, col_name=col_name) for col_name in message_col_names]
        messages = np.array(messages)
        messages = np.swapaxes(messages, 0, 1)
        avg_mse = get_MSE(messages, utilities)
        mse_for_each_gen.append(avg_mse)
    mse_for_each_gen = np.array(mse_for_each_gen)
    gen_means = np.mean(mse_for_each_gen, axis=0)
    for i in range(gen_means.shape[0]):
        print('MSE at gen', i, gen_means[i])
        

chain length 2
MSE at gen 0 0.15249019839231806
MSE at gen 1 0.16559920683749427
chain length 3
MSE at gen 0 0.1452243277945769
MSE at gen 1 0.18950288822335631
MSE at gen 2 0.2075798799606381
chain length 4
MSE at gen 0 0.16559906718085596
MSE at gen 1 0.22155681617148565
MSE at gen 2 0.24968189732755103
MSE at gen 3 0.25265211834531726
chain length 5
MSE at gen 0 0.14880802541477228
MSE at gen 1 0.1987770888243959
MSE at gen 2 0.22459151125821125
MSE at gen 3 0.2333077561668032
MSE at gen 4 0.23597813272698157


## Qualitative analysis of messages for a single reward matrix

The approach: within a single run, find a single reward matrix, and get all the messages associated with it. Show how similar those messages are to one another. Compare across generations - my hypothesis is that the messages should be increasingly similar as generation increases, since there is increasing overlap in what the agents see.

In [10]:
# load in file
file_path = '/home/dharakyu/signaling-bandits/outputs/' \
                '4x4-partial_chunk-2_chain-len-5_run-1_val.pkl'
NUM_SHAPES = 4
NUM_COLORS = 4

df = pd.read_pickle(file_path)

In [11]:
reward_matrices = convert_df_col_to_np_array(df=df, col_name='reward_matrix') # (3200, NUM_COLORS*NUM_SHAPES*encoding_len)
utilities = extract_utilities_from_reward_matrix(reward_matrices) # (3200, NUM_COLORS*NUM_SHAPES)
_, indices = np.unique(utilities, return_inverse=True, axis=0)
indices.shape
# pick the arrangement of utilities that occurs most frequently
c = Counter(list(indices))
most_freq_val, count = c.most_common(1)[0]

# and get the indices where it appears
indices_of_most_freq_val = np.where(indices == most_freq_val)[0]
indices_of_most_freq_val

array([   8,   87,  108, 1025, 1265, 1448, 1493, 1546, 1599, 2010, 2384,
       2542, 2804, 3133])

In [12]:
# get the messages corresponding to those indices
message_col_names = [col_name for col_name in df.columns.tolist() if 'message' in col_name]
print(message_col_names)
messages = [convert_df_col_to_np_array(df=df, col_name=col_name) for col_name in message_col_names]
messages = np.array(messages)
messages = np.swapaxes(messages, 0, 1)
print(messages.shape)

subset_of_messages = messages[indices_of_most_freq_val]
print(subset_of_messages.shape)

['message_0', 'message_1', 'message_2', 'message_3', 'message_4']
(3200, 5, 320)
(14, 5, 320)


In [13]:
# start by counting how many of the messages are the same
for i in range(5):
    print('generation', i)
    unique_messages, counts = np.unique(subset_of_messages[:, i, :], return_counts=True, axis=0)
    print(unique_messages.shape)
    print(counts)

generation 0
(7, 320)
[1 1 2 7 1 1 1]
generation 1
(2, 320)
[ 1 13]
generation 2
(1, 320)
[14]
generation 3
(1, 320)
[14]
generation 4
(1, 320)
[14]


In [14]:
for i in range(5):
    print(messages[:, i, :].shape)
    unique_messages_across_iteration = np.unique(messages[:, i, :], axis=0)
    print(len(unique_messages_across_iteration))

(3200, 320)
47
(3200, 320)
34
(3200, 320)
30
(3200, 320)
22
(3200, 320)
27
