In [1]:
from gensim.models import AuthorTopicModel

model = AuthorTopicModel.load('model.atmodel')



In [3]:
model.show_topics(num_topics=10)

[(18,
  '0.017*"el" + 0.014*"a" + 0.014*"mazo" + 0.013*"paso" + 0.012*"edomex" + 0.011*"ieem" + 0.010*"gobernador" + 0.009*"elección" + 0.009*"pan" + 0.007*"Del Mazo"'),
 (12,
  '0.026*"columna" + 0.010*"enlared" + 0.007*"vía" + 0.005*"venezuela" + 0.005*"fernando" + 0.004*"apoyar" + 0.003*"poesía" + 0.003*"TU VOZ" + 0.003*"notieress" + 0.003*"editor"'),
 (9,
  '0.007*"mesaparatodos" + 0.006*"we" + 0.005*"go" + 0.004*"vocesendirecto" + 0.004*"lipton" + 0.004*"fmi" + 0.004*"David Lipton" + 0.004*"obamacare" + 0.004*"espalda" + 0.004*"still"'),
 (15,
  '0.033*"laprimeramvs" + 0.009*"alaire_laprimeramvs" + 0.008*"yosoitu" + 0.006*"Pascual Castro" + 0.004*"antony" + 0.004*"carbonell" + 0.004*"Cristina Carbonell" + 0.003*"opiniónlsr" + 0.002*"freno" + 0.002*"femenina"'),
 (10,
  '0.015*"enfoque_noticias" + 0.006*"or" + 0.005*"excélsiorinforma" + 0.005*"its" + 0.004*"excélsiorinforma_excélsiortv" + 0.004*"sandoval" + 0.003*"Baja California Sur" + 0.003*"some" + 0.003*"down" + 0.003*"them"'),

In [4]:
aut_top = {}
for key, value in model.id2author.items():
    aut_top[value] = model.get_author_topics(value)

In [5]:
aut_top

{'ADNPolitico': [(16, 0.99929650810007709)],
 'ActualidadRT': [(16, 0.99971426158086563)],
 'Adela_Micha': [(2, 0.99550011550942041)],
 'Amsalazar': [(2, 0.99311653655557697)],
 'AnaPOrdorica': [(18, 0.99702327433593396)],
 'AristeguiOnline': [(7, 0.99960485729453841)],
 'AztecaNoticias': [(14, 0.99873227708280488)],
 'Canal22': [(2, 0.99910215267298641)],
 'CanalOnceTV': [(6, 0.9981340187304123)],
 'CarlosLoret': [(17, 0.99050695173763437)],
 'CiroGomezL': [(5, 0.99896284724184703)],
 'DeniseDresserG': [(5, 0.99878556471165814)],
 'DeniseMaerker': [(2, 0.99920302349447487)],
 'DiegoEOsorno': [(0, 0.14999666910894646),
  (8, 0.14999705419968132),
  (13, 0.14999719492515337),
  (15, 0.14999708595593339),
  (16, 0.29285363591085511)],
 'E_Q_': [(1, 0.091824800570482673),
  (2, 0.027273594297190385),
  (6, 0.21067892517508072),
  (7, 0.3797903686329398),
  (14, 0.034884057604243242),
  (16, 0.1832342347084242),
  (19, 0.055287684101352895)],
 'EfektoNoticias': [(1, 0.18564323882958397),
 

In [6]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0  # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

Wall time: 1.19 s


In [None]:

from bokeh.io import output_file
output_file('grafica.html')

In [None]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]

scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            radii=radii,
        )
    )

hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ]
    )

p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)

In [None]:
from gensim.similarities import MatrixSimilarity


index = MatrixSimilarity(model[list(model.id2author.values())])



In [None]:

from gensim import matutils
import pandas as pd


author_vecs = [model.get_author_topics(author) for author in model.id2author.values()]

def similarity(vec1, vec2):
    dist = matutils.hellinger(matutils.sparse2full(vec1, model.num_topics), \
                              matutils.sparse2full(vec2, model.num_topics))
    sim = 1.0 / (1.0 + dist)
    return sim

def get_sims(vec):
    sims = [similarity(vec, vec2) for vec2 in author_vecs]
    return sims

def get_table(name, top_n=10, smallest_author=1):
    sims = get_sims(model.get_author_topics(name))


    table = []
    for elem in enumerate(sims):
        author_name = model.id2author[elem[0]]
        sim = elem[1]
        author_size = len(model.author2doc[author_name])
        if author_size >= smallest_author:
            table.append((author_name, sim, author_size))
            

    df = pd.DataFrame(table, columns=['Author', 'Score', 'Size'])
    df = df.sort_values('Score', ascending=False)[:top_n]
    
    return df

In [None]:
get_table('Pajaropolitico',top_n=136)

In [None]:
import pickle
dictionary = pickle.load(open("dictionary.p", "rb"))

In [None]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()

In [None]:
pyLDAvis.gensim.prepare(model, model.corpus,dictionary)