In [39]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

check_dir = '092319_05082019_checkpoints'

In [40]:
# Load saved data
model = t.load(f'{check_dir}/checkpoint_1.pth', map_location='cpu')
dataset = t.load(f'{check_dir}/dataset.pth')

In [41]:
def get_proportions(doc_weights):
    """
    Softmax document weights to get proportions
    """
    return F.softmax(doc_weights, dim=1)

def get_doc_vectors(doc_weights, topic_embeds):
    """
    Multiply by proportions by topic embeddings to get document vectors
    """
    proportions = get_proportions(doc_weights)
    doc_vecs = t.matmul(topic_embeds, t.t(proportions))

    return t.t(doc_vecs)

In [42]:
topic_embeds = model["model_state_dict"]["topic_embeds"]
word_embeds = model["model_state_dict"]["word_embeds.weight"]
doc_weights = model["model_state_dict"]["doc_weights.weight"]
vocab = list(dataset['term_freq_dict'].keys())
doc_embeds = get_doc_vectors(doc_weights, topic_embeds)

print(doc_embeds.size())

torch.Size([11314, 128])


In [43]:
def wordvec2idx(word_vec):
    return np.where(word_embeds.numpy() == word_vec.numpy())[0][0]

def vec2word(word_vec):
    idx = wordvec2idx(word_vec)
    return vocab[idx]

In [44]:
def get_n_closest_word_vecs(topic_vec, n=20):
    dist = F.cosine_similarity(word_embeds, topic_vec.unsqueeze(dim=1).transpose(0, 1))
    index_sorted = dist.argsort()
    return index_sorted[:n]

In [45]:

for i, topic in enumerate(topic_embeds.transpose(0, 1)):
    # Get 10 closest word_embeds
    # Print word_embeds words
    top_10 = get_n_closest_word_vecs(topic)
    
    print(f'\nTOPIC: {i}')
    for word_vec in top_10:
        print(vocab[word_vec])


TOPIC: 0
valleyhousedrive
egging
modlwuodcagmbcryegjbucxhurrc
hmo
supersets
oklahoma
myriads
impicitly
predecessor
simulators
fking
mdheslywblkarwurtlelk
overbearingly
qvrcfcpnewscstuberlinde
brush
grips
programmer
weakest
knbc
sird

TOPIC: 1
subw
someother
beneficial
sayonara
ahrma
tedgrusec
dsm
luns
erichtaylor
tsj
standardized
extramarital
grounders
bebtt
lmihnkcffvw
cpsoivflja
dictum
richmondva
betweenday
qfjtyln

TOPIC: 2
skidpad
clapped
mustapha
alweis
larrysimmons
lelsxb
emigration
jkeithalston
mcnhmjzfznlrjyjwec
mercyja
okeqg
roma
qaplus
quoth
apitoolkitsdk
liquid
nauseous
dickking
williamwhughes
guity

TOPIC: 3
precise
ipsc
petrivaris
wamsley
immolation
snooping
mtwwizkneyfttmtmtgqfplpdeqqq
objectcenter
ashortday
noo
cosmicsector
naked
talll
paces
beforewarned
latimes
kvkvjrmclhzricrbjiy
hogan
madonna
jrobvdjdeuf

TOPIC: 4
fkzhquhebuandrewcmuedu
billsatterlee
faddish
reliant
luisalicea
mtgqdeqtmtcttfffttmtmtdwtm
yromdnx
eskulap
cia
facsimile
cyprus
cocktails
centrally
karen
p

## 