In [1]:
from gensim.models import AuthorTopicModel

model = AuthorTopicModel.load('model.atmodel')

In [2]:
model.show_topics(num_topics=100)

[(0,
  '0.017*"cdmx" + 0.013*"oaxaca" + 0.012*"katia" + 0.011*"video" + 0.011*"chiapas" + 0.010*"huracn" + 0.007*"veracruz" + 0.007*"epn" + 0.007*"personas" + 0.007*"irma"'),
 (1,
  '0.000*"giran" + 0.000*"bsico" + 0.000*"aporta" + 0.000*"mariposas" + 0.000*"culturizando" + 0.000*"normalayon" + 0.000*"dutch" + 0.000*"óptimas" + 0.000*"ensueño" + 0.000*"comparezca"'),
 (2,
  '0.025*"chumel" + 0.021*"websanjuanamartinez" + 0.021*"pederasta" + 0.017*"papi" + 0.013*"cómico" + 0.013*"chacotero" + 0.013*"lafallaras" + 0.010*"Frente Amplio del PRD" + 0.009*"contratar" + 0.009*"cadillac"'),
 (3,
  '0.000*"giran" + 0.000*"bsico" + 0.000*"aporta" + 0.000*"mariposas" + 0.000*"culturizando" + 0.000*"normalayon" + 0.000*"dutch" + 0.000*"óptimas" + 0.000*"ensueño" + 0.000*"comparezca"'),
 (4,
  '0.227*"enlared" + 0.025*"EnLaRed DiegoPetersen" + 0.025*"gilgamesx" + 0.012*"hermetismo" + 0.011*"botica" + 0.010*"hualgami" + 0.005*"transa" + 0.005*"elocuente" + 0.005*"convive" + 0.000*"culturizando"'),
 

In [3]:
aut_top = {}
for key, value in model.id2author.items():
    aut_top[value] = model.get_author_topics(value)

In [4]:
aut_top

{'ADNPolitico': [(0, 0.2538728977356402),
  (9, 0.15910772936731538),
  (31, 0.086257140820394387),
  (50, 0.32223044615573487),
  (71, 0.041534963972440692),
  (81, 0.13630519431984961)],
 'ActualidadRT': [(0, 0.46925346579894406),
  (9, 0.021997301515888468),
  (31, 0.11218874794248269),
  (50, 0.045363227132802747),
  (53, 0.13584655430646558),
  (62, 0.037807649716972959),
  (71, 0.033528616344569558),
  (76, 0.035454858942734854),
  (81, 0.074716630632197051),
  (87, 0.021085723919161754),
  (99, 0.012613460693633359)],
 'Adela_Micha': [(0, 0.10189842225301565),
  (31, 0.23585699498077009),
  (71, 0.10093322697292331),
  (81, 0.39393264676977385),
  (95, 0.070782190200438216)],
 'Amsalazar': [(0, 0.27846222749399868),
  (6, 0.20623360854770528),
  (9, 0.03346180463458024),
  (31, 0.058338139393003439),
  (32, 0.088732430863603731),
  (50, 0.12730465686089232),
  (53, 0.023474944579663506),
  (58, 0.025074798766637763),
  (63, 0.014579600356251509),
  (64, 0.018521674080698002),
  

In [5]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0  # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

CPU times: user 2.49 s, sys: 240 ms, total: 2.73 s
Wall time: 2.42 s


In [6]:

from bokeh.io import output_file
output_file('grafica.html')

In [7]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]

scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            radii=radii,
        )
    )

hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ]
    )

p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)