In [None]:
import sys
sys.path.append('/home/ego/Github/david/')

import os
from david.pipeline import TextPipeline

import tensorflow as tf
from tensorboard.plugins import projector
from tensorflow.contrib import tensorboard

In [None]:
def check_dir(save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path)

def write2file(index2word, meta_name: str, save_path: str, join_by='\n'):
    check_dir(save_path)
    with open(os.path.join(save_path, meta_name), 'w') as f:
        f.writelines(join_by.join(index2word))


def save_embedding_config(tensor_name,
                          tf_writer,
                          meta_name: str,
                          save_path: str):
    '''TensorBoard embeddings visualizer configuration session.
    Path:
    `tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto`
    '''
    config = tensorboard.plugins.projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = tensor_name
    embedding.metadata_path = os.path.join(save_path, meta_name)
    tensorboard.plugins.projector.visualize_embeddings(tf_writer, config)


def evaluate_tensor_fetches(tensor,
                            placeholder,
                            vectors,
                            tf_saver,
                            model_name: str,
                            save_path: str):
    '''Returns a model's path from the session's instance.
    '''
    with tf.Session() as sess:
        sess.run(tensor, feed_dict={placeholder: vectors})
        return tf_saver.save(sess, os.path.join(save_path, model_name))

def create_embeddings(gensim_model,
                      tf_value=0.0,
                      tf_trainable=True,
                      tf_varname='W',
                      model_name='tf-model.cpkt',
                      meta_name='metadata.tsv',
                      save_path='models/',
                      join_by='\n'
                      ):
    '''TensorBoard embedding visualizer for a gensim models.
    '''
    vectors = gensim_model.wv.vectors
    index2word = gensim_model.wv.index2word
    vocab_size = vectors.shape[0]
    embedding = vectors.shape[1]

    write2file(index2word, meta_name=meta_name,
               save_path=save_path, join_by=join_by)

    tf.reset_default_graph()
    W = tf.Variable(tf.constant(tf_value, shape=[vocab_size, embedding]),
                    trainable=tf_trainable, name=tf_varname)
    
    writer = tf.summary.FileWriter(save_path, graph=tf.get_default_graph())
    save_embedding_config(tensor_name=W.name, tf_writer=writer,
                          meta_name=meta_name, save_path=save_path)

    placeholder = tf.placeholder(tf.float32, [vocab_size, embedding])
    save_path = evaluate_tensor_fetches(tensor=W.assign(placeholder),
                                        placeholder=placeholder,
                                        vectors=vectors,
                                        tf_saver=tf.train.Saver(),
                                        model_name=model_name,
                                        save_path=save_path)
    return save_path


def save_w2v_model(gensim_model: object, save_path: str, model_name='gensim-model.cpkt'):
    check_dir(save_path)
    gensim_model.save(os.path.join(save_path, model_name))

In [None]:
MODEL_DIR = 'models/yt_model'

save_w2v_model(model, save_path=MODEL_DIR)
create_embeddings(gensim_model=model, save_path=MODEL_DIR)