# Save Hidden Layer Activations

This script saves the hidden layer activations from the models in the /model folder.
This script should be run from the same docker container as the models are trained from.

In [None]:
import tensorflow as tf
import yaml, os, h5py
import numpy as np
import pandas as pd
from nn_models import ConvModel, AffineModel, RecurrentModel
from nn_rmodels import ConvRModel, RecurrentRModel
from nn_train_utils import Dataset
import matplotlib
import pickle#, time
from tensorflow.contrib.rnn import *
from main import RunInfo

Specify model, run information, and file paths.

In [2]:
modelinfo = dict({'type': 'S',
        'typename': 'spatial_temporal',
        'base': 'spatial_temporal_4_8-16-16-32_32-32-64-64_7293',
        'base_regression': 'spatial_temporal_r_4_8-16-16-32_32-32-64-64_7293',
        'nlayers': 8,
        'max_nlayers': 8,
        'max_act': 14, #this can be manually adjusted as the maximum in the preferred direction histogram
        'control': False,
        'cmap': matplotlib.colors.ListedColormap(['midnightblue']),
        'color': 'midnightblue',
        'regression_color': 'darkturquoise',
        'control_cmap': 'Greys_r',
        'regression_cmap': matplotlib.colors.ListedColormap(['darkturquoise']),
        's_stride': 2,
        't_stride': 3,
        'regression_task': False,
        'model_path': None,
        'exp_id': None,})

runinfo = RunInfo({'expid': 402, #internal experiment id
                   'datafraction': 'auto',  #fraction (0,1] or 'auto' (i.e. if you want to run a new analysis but keep the old results that it would otherwise overwrite, increment by 1)
                   'randomseed': 2000,
                   'randomseed_traintest': 42,
                   'dirr2threshold': 0.2,
                   'verbose': 2, #0 (least), 1, 2 (most)
                   'model_experiment_id': 22,  #used in model training, int or 'auto'
                   'basefolder': '/media/data/DeepDraw/revisions/analysis-data/', ## change this folder to redirect to where the data is saved locally
                   'batchsize': 100, #for layer representation generation
                   'default_run': True, #only variable that is 'trial'-dependent,
                                    #ie should be changed when rerunning stuff in same folder
                                    #not semantically important for run info
                    'dpi': 500
            })

In [3]:
kinnames = ['endeffector_coords', 'joint_coords', 'muscle_coords', 'speed']

basefolder = runinfo['basefolder']
modelname = modelinfo['name']

model_path = f"{basefolder}models/experiment_{runinfo.model_experiment_id}/{modelname}/"
#path_to_data = f'{basefolder}../deep_proprioception/dataset/pcr_dataset_test.hdf5'
path_to_data = f'{basefolder}../pcr_data/pcr_dataset_test.hdf5'
PATH_TO_DATA = f'{basefolder}../pcr_data/'
MODELS_DIR = basefolder
path_to_config_file = f"{basefolder}models/experiment_{runinfo.model_experiment_id}/{modelname}/config.yaml"

Storing the model weights.

In [None]:
modelbase = modelinfo['base']
datafraction = runinfo['datafraction']

np.random.seed(runinfo['randomseed'])

print(modelname)

if path_to_data is not None:
    with h5py.File(path_to_data, 'r') as datafile:
        idxtups = []
        shape = datafile[kinnames[0]][()].shape
        kinarr = np.zeros((shape[0], 0, shape[2]))
        for name in kinnames:

            kin = datafile[name][()]
            try:
                ncols = datafile[name][()].shape[1]
            except:
                ncols = 1
                kin = kin.reshape(-1,1)
                kin = np.repeat(kin, shape[2], axis=1)
                kin = kin.reshape(kin.shape[0], 1, kin.shape[1])
            idxtups += list(zip([name]*ncols, range(ncols)))
            kinarr = np.concatenate((kinarr, kin), axis=1)

idx = pd.MultiIndex.from_tuples(idxtups)

#SPINDLE FIRING TEST DATA
test_data_path = os.path.join(PATH_TO_DATA, 'pcr_dataset_test.hdf5')
dataset = Dataset(test_data_path, dataset_type='test', key='spindle_info')    

#Extract needed data
data = dataset.test_data
labels = dataset.test_labels

# For when I want to use only a fraction of the dataset to train!
if datafraction is not None:
    random_idx = np.random.permutation(data.shape[0])
    subset_num = int(datafraction * random_idx.size)
    data = data[random_idx[:subset_num]]
    labels = labels[random_idx[:subset_num]]
    kinarr = kinarr[random_idx[:subset_num]]

nsamples, ninputs, ntime, _ = data.shape
#batch_size = nsamples
batch_size = runinfo.batchsize
#batch_size = 100 #can be updated based on GPU capacities for forward pass
#batch_size = 25 #can be updated based on GPU capacities for forward pass
num_steps = nsamples // batch_size

# CREATE PANDAS PANEL
print('kinarr shape', kinarr.shape)
kinvars = pd.Panel(np.swapaxes(kinarr, 0, 1), items=idx)
#time.sleep(10)

# INITIALIZE MODEL
tf.reset_default_graph()

with open(path_to_config_file, 'r') as myfile:
    model_config = yaml.load(myfile)
    train_mean = model_config['train_mean']


if not modelinfo['regression_task']:
    if (modelinfo['type'] in ['S', 'ST']):
        model = ConvModel(model_config['experiment_id'], model_config['nclasses'], model_config['arch_type'], \
                        int(model_config['nlayers']), model_config['n_skernels'], model_config['n_tkernels'], \
                        int(model_config['s_kernelsize']), int(model_config['t_kernelsize']), int(model_config['s_stride']), 
                        int(model_config['t_stride']))
    
    else:        
        print('building rec model')
        model = RecurrentModel(model_config['experiment_id'], model_config['nclasses'], model_config['rec_blocktype'], 
                            int(model_config['n_recunits']), int(model_config['npplayers']), list(map(int, model_config['nppfilters'])), 
                            int(model_config['s_kernelsize']), int(model_config['s_stride']))

else:
    if (modelinfo['type'] in ['S', 'ST']):
        model = ConvRModel(model_config['experiment_id'], model_config['arch_type'], \
                        int(model_config['nlayers']), model_config['n_skernels'], model_config['n_tkernels'], \
                        int(model_config['s_kernelsize']), int(model_config['t_kernelsize']), int(model_config['s_stride']), 
                        int(model_config['t_stride']), noutspace=6)
    
    else:        
        print('building rec model')
        model = RecurrentRModel(model_config['experiment_id'], model_config['rec_blocktype'], 
                            int(model_config['n_recunits']), int(model_config['npplayers']), list(map(int, model_config['nppfilters'])), 
                            int(model_config['s_kernelsize']), int(model_config['s_stride']), noutspace=6)


print("Old model path: ", model.model_path)
if not modelinfo['regression_task']:
    model.model_path = basefolder + model.model_path
    if(not modelinfo['control']):
        model.model_path = model.model_path + modelname[-2:] #Add control set number
    else:
        model.model_path = model.model_path + modelname[-3:]
else:
    model.model_path = model_path
print("New model path: ", model.model_path)

print('Final model.model_path', model.model_path)

#SAVE FOLLOW THROUGH    
datafolder = os.get_cwd()#runinfo.datafolder(modelinfo)
os.makedirs(datafolder, exist_ok=True)
kinvars.to_hdf(datafolder + "/kinvars.hdf5", key="data")
print("Kinvars saved")

pickle.dump(data, open(datafolder + "/data.pkl", "wb"), protocol=4)
print("MF saved")

pickle.dump(labels, open(datafolder + "/labels.pkl", "wb"), protocol=4)
print("Labels saved")

#layers = []

mygraph = tf.Graph()
with mygraph.as_default():
    # Declare placeholders for input data and labels
    X = tf.placeholder(tf.float32, shape=[batch_size, ninputs, ntime, 2], name="X")
    y = tf.placeholder(tf.int32, shape=[batch_size], name="y")

    # Compute scores and accuracy
    if not modelinfo['regression_task']:
        scores, probabilities, net = model.predict(X, is_training=False)
    else:
        scores, net = model.predict(X, is_training=False)

    # Test the `model`!
    restorer = tf.train.Saver()
    myconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
    
    for j in range(len(list((net.values()))) - 1):
        with tf.Session(config=myconfig) as sess:
            ckpt_filepath = os.path.join(model.model_path, 'model.ckpt')
            print('checkpoint filepath', ckpt_filepath)
            restorer.restore(sess, ckpt_filepath)
            
            for i in range(num_steps):
                if(runinfo.verbose >= 1):
                    print('batch %d / %d' %(i, num_steps))
                layer_batch = sess.run(list((net.values()))[j], \
                        feed_dict={X: data[batch_size*i:batch_size*(i+1)], y: labels[batch_size*i:batch_size*(i+1)]})
                
                if i == 0:
                    layer = h5py.File(datafolder + f"/l{j}.hdf5", 'w')
                    
                layer.create_dataset(str(i), data=layer_batch)