In [1]:
from gensim.models import AuthorTopicModel

model = AuthorTopicModel.load('model.atmodel')



In [12]:
model.show_topics(num_topics=50)

[(0,
  '0.052*"despiertaconloret" + 0.038*"justicia" + 0.035*"sistema" + 0.028*"sistema_justicia" + 0.024*"penal" + 0.013*"nl" + 0.011*"tabasco" + 0.011*"DespiertaConLoret…" + 0.008*"contrato" + 0.007*"cancela"'),
 (1,
  '0.072*"y" + 0.058*"tienes" + 0.045*"o" + 0.027*"opinión" + 0.021*"vía" + 0.019*"a" + 0.018*"escribe" + 0.006*"puedes" + 0.005*"pierdas" + 0.005*"mujeres"'),
 (2,
  '0.012*"usuaria" + 0.005*"mex" + 0.005*"procedimientos" + 0.004*"estante" + 0.004*"obregón" + 0.004*"cañeros" + 0.003*"Michael Phelps" + 0.003*"phelps" + 0.003*"alertaamarilla" + 0.003*"Secretario de Gobernación de"'),
 (3,
  '0.018*"felicidades" + 0.016*"crisis_venezuela" + 0.010*"políticos" + 0.009*"slp" + 0.009*"hospital" + 0.009*"conoce" + 0.008*"presos" + 0.007*"cristian" + 0.007*"a" + 0.006*"años"'),
 (4,
  '0.007*"that" + 0.005*"black" + 0.004*"comparto" + 0.004*"want" + 0.004*"obligada" + 0.004*"your" + 0.003*"periscope" + 0.003*"canceled" + 0.003*"veamos" + 0.003*"hand"'),
 (5,
  '0.028*"eslahorade

In [28]:
aut_top = {}
for key, value in model.id2author.items():
    aut_top[value] = model.get_author_topics(value)

In [31]:
aut_top

{'ADNPolitico': [(36, 0.99962283812681996)],
 'ActualidadRT': [(27, 0.99986539535824071)],
 'Adela_Micha': [(12, 0.5099999999999969), (42, 0.010000000000002972)],
 'Amsalazar': [(22, 0.99609516976902457)],
 'AnaPOrdorica': [(6, 0.092489474817048722),
  (27, 0.072393630080785351),
  (34, 0.83421978233782712)],
 'AristeguiOnline': [(0, 0.087995473347999897),
  (6, 0.060985333952853021),
  (11, 0.27384731158770742),
  (18, 0.053999349674035801),
  (27, 0.16585999063923196),
  (34, 0.11970720923134785),
  (36, 0.17747656590622746),
  (44, 0.059947438377104451)],
 'Azteca': [(1, 0.05602259345501643),
  (6, 0.01059351487030669),
  (11, 0.031903206961147602),
  (18, 0.025919474742182359),
  (27, 0.18309339068189173),
  (31, 0.33831994530066317),
  (34, 0.11411589733733708),
  (36, 0.23876126663049779)],
 'AztecaNoticias': [(1, 0.022949177498866602),
  (6, 0.23794111825449862),
  (11, 0.067497729649529617),
  (23, 0.15886416654341443),
  (27, 0.20841287456848651),
  (34, 0.021885421146041646),

In [None]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0  # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

In [None]:

from bokeh.io import output_file
output_file('grafica.html')

In [None]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]

scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            radii=radii,
        )
    )

hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ]
    )

p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)

In [None]:
from gensim.similarities import MatrixSimilarity


index = MatrixSimilarity(model[list(model.id2author.values())])



In [None]:

from gensim import matutils
import pandas as pd


author_vecs = [model.get_author_topics(author) for author in model.id2author.values()]

def similarity(vec1, vec2):
    dist = matutils.hellinger(matutils.sparse2full(vec1, model.num_topics), \
                              matutils.sparse2full(vec2, model.num_topics))
    sim = 1.0 / (1.0 + dist)
    return sim

def get_sims(vec):
    sims = [similarity(vec, vec2) for vec2 in author_vecs]
    return sims

def get_table(name, top_n=10, smallest_author=1):
    sims = get_sims(model.get_author_topics(name))


    table = []
    for elem in enumerate(sims):
        author_name = model.id2author[elem[0]]
        sim = elem[1]
        author_size = len(model.author2doc[author_name])
        if author_size >= smallest_author:
            table.append((author_name, sim, author_size))
            

    df = pd.DataFrame(table, columns=['Author', 'Score', 'Size'])
    df = df.sort_values('Score', ascending=False)[:top_n]
    
    return df

In [None]:
get_table('Pajaropolitico',top_n=136)

In [None]:
import pickle
dictionary = pickle.load(open("dictionary.p", "rb"))

In [None]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()

In [None]:
pyLDAvis.gensim.prepare(model, model.corpus,dictionary)