In [1]:
from gensim.models import AuthorTopicModel

model = AuthorTopicModel.load('model.atmodel')

In [2]:
model.show_topics(num_topics=100)

[(0,
  '0.000*"conducen" + 0.000*"recibes" + 0.000*"achiquini" + 0.000*"mascotas" + 0.000*"vacunar" + 0.000*"agencias" + 0.000*"pañales" + 0.000*"tania" + 0.000*"martibatres" + 0.000*"automtico"'),
 (1,
  '0.024*"años" + 0.014*"fotos" + 0.010*"rius" + 0.010*"vida" + 0.009*"despiertaconloret" + 0.008*"millones" + 0.008*"mundo" + 0.007*"mujer" + 0.007*"política" + 0.007*"río"'),
 (2,
  '0.000*"conducen" + 0.000*"recibes" + 0.000*"achiquini" + 0.000*"mascotas" + 0.000*"vacunar" + 0.000*"agencias" + 0.000*"pañales" + 0.000*"tania" + 0.000*"martibatres" + 0.000*"automtico"'),
 (3,
  '0.000*"conducen" + 0.000*"recibes" + 0.000*"achiquini" + 0.000*"mascotas" + 0.000*"vacunar" + 0.000*"agencias" + 0.000*"pañales" + 0.000*"tania" + 0.000*"martibatres" + 0.000*"automtico"'),
 (4,
  '0.164*"ntt" + 0.043*"rivapa" + 0.034*"porsinoloviste" + 0.023*"lt" + 0.021*"spinoso" + 0.018*"jairavalosl" + 0.012*"latentación" + 0.010*"impreso" + 0.009*"ampliamos" + 0.008*"ejecentral"'),
 (5,
  '0.000*"conducen" 

In [3]:
aut_top = {}
for key, value in model.id2author.items():
    aut_top[value] = model.get_author_topics(value)

In [4]:
aut_top

{'ADNPolitico': [(1, 0.03389455564465485),
  (9, 0.10829684830645275),
  (12, 0.13287348292325088),
  (13, 0.16173759610677979),
  (60, 0.25467140398268601),
  (65, 0.15565198079059506),
  (66, 0.15250081031299906)],
 'ActualidadRT': [(1, 0.095935198260091337),
  (9, 0.037752682122159896),
  (12, 0.032269969479726324),
  (54, 0.2859850740056431),
  (60, 0.25968565832639295),
  (65, 0.099875063028339608),
  (66, 0.18834347223507714)],
 'Adela_Micha': [(1, 0.14620347240794149),
  (9, 0.069583684793647876),
  (12, 0.20436260083534111),
  (60, 0.085248490037325861),
  (66, 0.14013523966477617),
  (75, 0.34997444914593218)],
 'Amsalazar': [(1, 0.027161830463997172),
  (9, 0.18213217407073903),
  (12, 0.3452922044924252),
  (30, 0.010700341514218017),
  (58, 0.030616099409469459),
  (60, 0.074743212645751497),
  (65, 0.14454778014730163),
  (66, 0.053247888821082424),
  (73, 0.04604953461002366),
  (83, 0.035162396352569444),
  (95, 0.045947796660293541)],
 'AnaPOrdorica': [(1, 0.02177705459

In [5]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0  # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

CPU times: user 1.18 s, sys: 32 ms, total: 1.21 s
Wall time: 609 ms


In [6]:

from bokeh.io import output_file
output_file('grafica.html')

In [7]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]

scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            radii=radii,
        )
    )

hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ]
    )

p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)