In [21]:
import shutil
from pathlib import Path

from IPython.display import HTML as html_print
from IPython.display import display


import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

from lstm import WordLSTM, LSTM, LSTMCell
from data import WordTokenizer


__author__ = '__Girish_Hegde__'

In [35]:
CKPT = Path('./data/runs/best.pt')
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
TEXT = './data/datasets/corpus.txt'
LOGDIR = './data/viz/embedding'

ckpt = torch.load(CKPT, map_location=DEVICE)
net = WordLSTM(ckpt['VOCAB_SIZE'], ckpt['EMBEDDING_DIM'], ckpt['HIDDEN_SIZE'], ckpt['NUM_LAYERS'])
net.load_state_dict(ckpt['state_dict'])
net = net.to(DEVICE)
net.eval()
embedder = net.emb

int2token = ckpt['int2token']
token2int = {tk: i for i, tk in int2token.items()}
firing = []
text = WordTokenizer.read(TEXT, encoding='utf-8')
_, _, tokens = WordTokenizer.tokenize(text, lowercase=True)
tokens = [tk for tk in tokens  if tk in token2int]

In [36]:
enc = torch.tensor([token2int[tk] for tk in tokens], dtype=torch.int64, device=DEVICE)
emb = embedder(enc).detach()

In [39]:
if Path(LOGDIR).exists(): 
    shutil.rmtree(LOGDIR)
writer = SummaryWriter(LOGDIR)


In [40]:
writer.add_embedding(emb, metadata=tokens)
writer.close()