In [1]:
import itertools
import copy
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import pickle
import random
from sklearn.model_selection import train_test_split
import time
from joblib import Parallel, delayed
import pandas as pd
import sys
import os
import seaborn as sns 
from sklearn.metrics import r2_score
import math
import matplotlib as mpl

from cryptic_rnn import *

In [7]:
def run_exp(trainseqs_old, trainseqs_old_p, testseqs_old, cue_dict, num_inputs):
    """ assigns integers to symbols and trains RNN on sequence - evaluating on test set on each trial"""
    # assigning value to symbol and calculating trial outputs
    all_syms = list(cue_dict.keys())
    all_input_vals = list(np.arange(2,18))
    input_vals = random.sample(all_input_vals,num_inputs)
    input_vals.sort() # sort values (so A is smallest)
    # randomly select values for each input
    for i, s in enumerate(all_syms):
        cue_dict[s] = input_vals[i]
    
    # load train and test trials
    trainseqs = change_dict(trainseqs_old, cue_dict)
    train_inputs = convert_seq2inputs(trainseqs, num_classes=num_classes, seq_len=5)
    trainset = DataLoader(train_inputs, batch_size=batchsize, shuffle=True)

    trainseqs = change_dict(trainseqs_old_p, cue_dict)
    train_inputs = convert_seq2inputs(trainseqs, num_classes=num_classes, seq_len=5)
    trainset_p = DataLoader(train_inputs, batch_size=batchsize, shuffle=True)
    
    testseqs = change_dict(testseqs_old, cue_dict)
    test_inputs = convert_seq2inputs(testseqs, num_classes=num_classes, seq_len=5)
    testset = DataLoader(test_inputs, batch_size=batchsize, shuffle=True)

    # run primitive training RNN
    model = OneStepRNN(input_size, output_size, hidden_size, num_layers, xavier_gain)
    model_p = copy.deepcopy(model)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
    loss1, acc1 = run_acc(model,optimizer,criterion, trainset, [trainset, testset], epochs, hidden_size)
    
    optimizer = torch.optim.Adam(model_p.parameters(), lr=learningRate)
    loss2, acc2 = run_acc(model_p,optimizer,criterion, trainset_p, [trainset_p, testset], epochs, hidden_size)
       
    return {'cue_dict':cue_dict, 'loss':loss1, 'acc':acc1, 'mod': model, 'test': testset,
           'loss_p':loss2, 'acc_p':acc2, 'mod_p': model_p}

def collect_accs(trainseqs, trainseqs_p, testseqs, cue_dict, num_inputs):
    """ trains RNNs in parallel and collects r^2 and acc vals"""
    t1 = time.time()
    res  = Parallel(n_jobs = -1)(delayed(run_exp)(trainseqs, trainseqs_p, testseqs, cue_dict, num_inputs) for i in range(num_sims))
    t2 = time.time()
    print('run time: ', (t2-t1)/60)

    tests = [r['test'] for r in res]
    cue_dicts = [r['cue_dict'] for r in res]
    
    mods = [r['mod'] for r in res]
    acc_train_list = [r['acc'][:,0] for r in res]
    acc_test_list = [r['acc'][:,1] for r in res]
    acc_train = np.array(acc_train_list).T
    acc_test = np.array(acc_test_list).T

    mods_p = [r['mod_p'] for r in res]
    acc_train_list_p = [r['acc_p'][:,0] for r in res]
    acc_test_list_p = [r['acc_p'][:,1] for r in res]
    acc_train_p = np.array(acc_train_list_p).T
    acc_test_p = np.array(acc_test_list_p).T
       
    acc_df = pd.DataFrame({'acc_train': acc_train[-1,:], 'acc_train_p': acc_train_p[-1,:],\
                          'acc_test': acc_test[-1,:], 'acc_test_p': acc_test_p[-1,:]})
    
    # fully trained
    fully_trained = acc_df[(acc_df['acc_train'] == 1)&(acc_df['acc_train_p'] == 1)]
    fully_trained_idx = fully_trained.index
    ft_mods = [mods[i] for i in fully_trained_idx]
    print('fully trained models: ',len(ft_mods))
    
    if len(ft_mods) > 0:
        
        ft_tests = [tests[i] for i in fully_trained_idx]
        ft_mods_p = [mods_p[i] for i in fully_trained_idx]

        ft_tests_acc = np.array([acc_test_list[i] for i in fully_trained_idx]).T
        ft_train_acc = np.array([acc_train_list[i] for i in fully_trained_idx]).T
        final_acc = np.mean(ft_tests_acc[-1,:])
        final_acc_std = np.std(ft_tests_acc[-1,:])
              
        ft_tests_acc_p = np.array([acc_test_list_p[i] for i in fully_trained_idx]).T
        ft_train_acc_p = np.array([acc_train_list_p[i] for i in fully_trained_idx]).T
        final_acc_p = np.mean(ft_tests_acc_p[-1,:])
        final_acc_std_p = np.std(ft_tests_acc_p[-1,:])
        
        r2, dfs, alldfs = predcorr(ft_mods, ft_tests, hidden_size, plot_corr = False)
        r2_p, dfs_p, alldfs = predcorr(ft_mods_p, ft_tests, hidden_size, plot_corr =False)
        
        print('no prims R^2: ', r2, '; acc = ', final_acc)
        print('with prims R^2: ', r2_p, '; acc = ', final_acc_p)

    else:
        r2, dfs, final_acc = 0, 0, 0
        r2_p, dfs_p, final_acc_p = 0, 0, 0
        
    return {'res':res, 'mods':mods, 'mods_p':mods_p, 'tests': tests, 'cue_dicts': cue_dicts, 'acc_df':acc_df,\
           'dfs':dfs, 'dfs_p':dfs_p, 'r2':r2, 'r2_p':r2_p, 'final_acc':final_acc, 'final_acc_p': final_acc_p,\
           'final_acc_std':final_acc, 'final_acc_std_p': final_acc_p}


In [8]:
# data
num_classes = 22
batchsize=1

# RNN specs
input_size = num_classes
output_size = 1
num_layers = 1
hidden_size = 20
learningRate = 0.005

## running params
epochs = 12
num_sims = 8

# 
xavier_gain = 0.001

In [9]:
ops = '+'

num_inputs = 4
total_syms = ['A','B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P']
all_syms = total_syms[:num_inputs]
all_input_vals = list(np.arange(2,18))
input_vals = random.sample(all_input_vals,num_inputs)
# randomly select values for each input
cue_dict = {}
for i, s in enumerate(all_syms):
    cue_dict[s] = input_vals[i]
    
print(cue_dict)
primitives = generate_primitives(all_syms, cue_dict)
print(primitives)
pos_primitives = generate_pos_primitives(all_syms, cue_dict)
print(pos_primitives)

{'A': 16, 'B': 3, 'C': 4, 'D': 6}
[['A', '=', 16], ['B', '=', 3], ['C', '=', 4], ['D', '=', 6]]
[[('+', 'A'), '=', 16], [('+', 'B'), '=', 3], [('+', 'C'), '=', 4], [('+', 'D'), '=', 6]]


In [10]:
trainseqs = generate_pos_other(ops, all_syms[:-1], cue_dict)
trainseqs_p = generate_pos_other(ops, all_syms[:-1], cue_dict) + pos_primitives
testseqs = generate_pos_trials(ops, all_syms, all_syms, cue_dict)
print('trainseqs ', trainseqs)
print('testseqs ', testseqs )
res1 = collect_accs(trainseqs, trainseqs_p, testseqs, cue_dict, num_inputs)

fig, ax = plt.subplots(2,1)
heatmap_acc_sign(num_inputs, res1['dfs1'], ax[0])
heatmap_acc_sign(num_inputs, res1['dfs2'], ax[1])

trainseqs  [[('+', 'A'), ('+', 'B'), '=', 19], [('+', 'B'), ('+', 'C'), '=', 7], [('+', 'C'), ('+', 'A'), '=', 20]]
testseqs  [[('+', 'A'), ('+', 'A'), '=', 32], [('+', 'A'), ('+', 'B'), '=', 19], [('+', 'A'), ('+', 'C'), '=', 20], [('+', 'A'), ('+', 'D'), '=', 22], [('+', 'B'), ('+', 'A'), '=', 19], [('+', 'B'), ('+', 'B'), '=', 6], [('+', 'B'), ('+', 'C'), '=', 7], [('+', 'B'), ('+', 'D'), '=', 9], [('+', 'C'), ('+', 'A'), '=', 20], [('+', 'C'), ('+', 'B'), '=', 7], [('+', 'C'), ('+', 'C'), '=', 8], [('+', 'C'), ('+', 'D'), '=', 10], [('+', 'D'), ('+', 'A'), '=', 22], [('+', 'D'), ('+', 'B'), '=', 9], [('+', 'D'), ('+', 'C'), '=', 10], [('+', 'D'), ('+', 'D'), '=', 12]]
run time:  0.01611565351486206
fully trained models:  0


UnboundLocalError: local variable 'ft_train_acc' referenced before assignment