# Store embeddings for tSNE (Tensorboard)

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import xavier_initializer as xav_init

from input_pipeline import eval_data
from model_fns import vgg16,inceptionv4,resnet50

# Run choices
cur_model   = resnet50
cur_checkpt = 'threesplit_resnet_hingeloss_alpha1'
cur_name    = 'threesplit_resnet_hingeloss'
cur_scope   = 'threesplit'

# Only use GPU RAM that is required
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

os.environ["CUDA_DEVICE_ORDER"]    = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = ''

!echo "GPU Device in use: '$CUDA_VISIBLE_DEVICES'"

# Folders for storage/retrival
main_directory  = '../'
data_directory        = main_directory + 'data/'
embedding_directory   = main_directory + 'embeds/'
checkpoints_directory = main_directory + 'checkpts/'

class_names = [] # 345 classes
class_names_small = [] # 5 fruits, 5 vehicles
with open(data_directory + 'classnames.csv', 'r') as cln_file:
    for line in cln_file:
        class_names += [line[:-1]]
with open(data_directory + 'classnames_small.csv', 'r') as cln_file:
    for line in cln_file:
        class_names_small += [line[:-1]]

class_dict = {}
for i, label in enumerate(class_names):
    class_dict[label] = i
class_dict_small = {}
for i, label in enumerate(class_names_small):
    class_dict_small[label] = i

GPU Device in use: ''


In [2]:
_, _, images, labels = eval_data(batch_size=30, small=True, int_labels=False)

In [3]:
logits = cur_model(images, mode=None, name=cur_scope)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [None]:
# from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
# print_tensors_in_checkpoint_file('../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-5000', 
#                                  tensor_name='', all_tensors=False)

In [5]:
# # Limit CPU
# session_conf = tf.ConfigProto(
#       intra_op_parallelism_threads=3,
#       inter_op_parallelism_threads=3)
# sess = tf.Session(config=session_conf)

In [6]:
from tensorflow.contrib.tensorboard.plugins import projector

saver = tf.train.Saver(
    var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=cur_scope)
)
    
checkpoint_dir = checkpoints_directory + cur_checkpt + '/'
embedding_dir  = embedding_directory   + cur_name + '/'

if not os.path.exists(checkpoint_dir):
    raise Exception("Checkpoint directory not found\nModel name: {}\nDirectory: {}".format(model_name, checkpoint_dir))
if not os.path.exists(embedding_dir):
    os.makedirs(embedding_dir)

ckpt_saves = [5000,10000,15000,20000,25000,30000,35000,40000,45000,50000]
# ckpt_saves = [2000,4000,6000]
embeds_out = {ckpt_id : np.empty(shape=(0,64))            for ckpt_id in ckpt_saves}
labels_out = {ckpt_id : np.empty(shape=(0), dtype=np.str) for ckpt_id in ckpt_saves}

# Calculating embeddings
with tf.Session(config=session_conf) as sess: 
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess.run(init_op)
    sess.run(tf.tables_initializer())

    for ckpt_id in ckpt_saves:
        saver.restore(sess, checkpoint_dir + 'checkpoint-' + str(ckpt_id))
        
        for _ in range(50):
            embed_out, label_out = sess.run([logits, labels])
            embeds_out[ckpt_id] = np.append(embeds_out[ckpt_id], embed_out, axis=0)
            labels_out[ckpt_id] = np.append(labels_out[ckpt_id], label_out, axis=0)
    
# Storing embeddings
embedding_vars = {ckpt_id : tf.Variable(embeds_out[ckpt_id], name=cur_name + '-embedding-checkpt' + str(ckpt_id)) \
                  for ckpt_id in ckpt_saves}
label_files    = {ckpt_id : os.path.abspath(embedding_dir + 'labels_ckpt-' + str(ckpt_id) + '.csv') \
                  for ckpt_id in ckpt_saves}
for ckpt_id in ckpt_saves:
    np.savetxt(label_files[ckpt_id], labels_out[ckpt_id].astype(np.str), fmt="%s") # Write labels to file

with tf.Session(config=config) as sess:
    writer = tf.summary.FileWriter(embedding_dir, sess.graph)
    for embedding_var in embedding_vars.values():
        sess.run(embedding_var.initializer)
    # Add embedding visualizer
    config_projector = projector.ProjectorConfig()
    
    for ckpt_id, label_file in label_files.items():
        embedding = config_projector.embeddings.add()
        embedding.tensor_name = embedding_vars[ckpt_id].name
        embedding.metadata_path = label_file
        
    projector.visualize_embeddings(writer, config_projector)
    # Save the model
    saver_embed = tf.train.Saver(list(embedding_vars.values()))
    saver_embed.save(sess, embedding_dir + 'multiple_checkpoints')


INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-5000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-10000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-15000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-20000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-25000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-30000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-35000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-40000
INFO:tensorflow:Restoring parameters from ../checkpts/threesplit_resnet_hingeloss_alpha1/checkpoint-45000
INFO:tensorflow:Restoring parameters from ../ch