In [108]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge

## Read in file

In [111]:
FILE_PATH_ROOT = '/home/dharakyu/signaling-bandits/outputs/' \
                '4x4partial_chunk-size-2_discrete-comm_max-len-4_vocab-size-80_chain-len-5_val'
NUM_SHAPES = 4
NUM_COLORS = 4

df = pd.read_pickle(FILE_PATH_ROOT + '.pkl')

## Get labels (utilities)

In [122]:
def convert_df_col_to_np_array(df, col_name):
    """
    Helper function to convert a column of a df to a np array
    """
    list_of_np_arrays = df[col_name].tolist()
    return np.concatenate(list_of_np_arrays, axis=0)

def map_reward_matrix_to_index(stack):
    """
    Extract the utilities from the reward matrices
    """
    all_utilities = []
    for i in range(3200):
        row = stack[i, :]
        matrix = row.reshape(NUM_COLORS * NUM_SHAPES, -1)
        utilities = matrix[:, -2]
        all_utilities.append(utilities)
    return np.array(all_utilities)

In [127]:
reward_matrices = convert_df_col_to_np_array(df=df, col_name='reward_matrix') # (3200, NUM_COLORS*NUM_SHAPES*encoding_len)
utilities = map_reward_matrix_to_index(reward_matrices) # (3200, NUM_COLORS*NUM_SHAPES)

## Get inputs (messages)

In [128]:
message_col_names = [col_name for col_name in df.columns.tolist() if 'message' in col_name]
print(message_col_names)

['message_0', 'message_1', 'message_2', 'message_3', 'message_4']


In [129]:
messages = [convert_df_col_to_np_array(df=df, col_name=col_name) for col_name in message_col_names]
messages = np.array(messages)
messages = np.swapaxes(messages, 0, 1)
messages.shape

(3200, 5, 320)

## Train the model

In [134]:
def get_MSE(messages, utilities):
    X_train, X_test, y_train, y_test = train_test_split(messages, utilities)
    
    # for each generation of agents
    for gen in range(num_agents):
        print('gen', gen)
        X_train_gen = X_train[:, gen, :]
        X_test_gen = X_test[:, gen, :]

        mses = []
        # for each object in the reward matrix
        for util_idx in range(y_train.shape[1]):
            clf = Ridge().fit(X_train_gen, y_train[:, util_idx])
            pred = clf.predict(X_test_gen)
            mse = clf.score(X_test_gen, y_test[:, util_idx])
            mses.append(mse)

        print(np.mean(mses))

In [135]:
get_MSE(messages, utilities)

gen 0
-0.05802458336890946
gen 1
-0.06091603833642041
gen 2
-0.07999456426378984
gen 3
-0.07181601755035517
gen 4
-0.06776301641761003


## Do it in a loop, for different experimental conditions