In [1]:
from gensim.models import AuthorTopicModel

model = AuthorTopicModel.load('model.atmodel')

In [2]:
model.show_topics(num_topics=100)

[(0,
  '0.041*"telfonorojo" + 0.036*"adriantrejo" + 0.024*"nykte" + 0.022*"uganda" + 0.020*"juliopilotzi" + 0.019*"legitimidad" + 0.016*"arruine" + 0.012*"xóchilt" + 0.012*"xóchilt_tress" + 0.011*"hechosysusurros"'),
 (1,
  '0.058*"virginiasendell" + 0.024*"Leticia Calderón Chelius" + 0.024*"chelius" + 0.019*"Marta Lamas AgendaPública" + 0.016*"Luis Manuel García Moreno" + 0.016*"erizosmx" + 0.015*"MariodelaRosa EsLaHoraDeOpinar" + 0.011*"Terremoto…" + 0.011*"Luis Miguel Prez" + 0.010*"anaordonana"'),
 (2,
  '0.145*"leaks" + 0.130*"ms_leaks" + 0.032*"Ms leaks:" + 0.022*"Te contamos:" + 0.016*"jmcartoon" + 0.011*"EPN Lozoya" + 0.010*"lomsyi" + 0.008*"resurgimiento" + 0.008*"washingtondc" + 0.007*"Ms moneros aquí :"'),
 (3,
  '0.024*"conversa" + 0.021*"extras" + 0.016*"cide" + 0.014*"indicaciones" + 0.012*"ocasiona" + 0.011*"adquisiciones" + 0.010*"vespertinas" + 0.009*"uckermann" + 0.009*"Christopher Uckermann" + 0.009*"NoEsBroma En"'),
 (4,
  '0.023*"Caso Narvarte" + 0.023*"sinhambremx

In [3]:
aut_top = {}
for key, value in model.id2author.items():
    aut_top[value] = model.get_author_topics(value)

In [4]:
aut_top

{'ADNPolitico': [(11, 0.34446485555951994),
  (24, 0.023567165748704256),
  (32, 0.011888274150411226),
  (52, 0.53413375632999072),
  (69, 0.085480614858942205)],
 'ActualidadRT': [(11, 0.31269067164655545),
  (22, 0.019131128760117466),
  (32, 0.028553645756288326),
  (52, 0.53864105987305622),
  (58, 0.01088985085275402),
  (69, 0.05170376687881683),
  (86, 0.022288522957446141)],
 'Adela_Micha': [(11, 0.51626996289775107),
  (22, 0.11253785060892957),
  (32, 0.029227959018688198),
  (39, 0.037929869965915054),
  (41, 0.039423102717278349),
  (50, 0.025110382850224103),
  (52, 0.13030959196521449),
  (59, 0.031783991533409009),
  (69, 0.046693254081443515)],
 'Amsalazar': [(11, 0.15720470811683446),
  (22, 0.15063377001768272),
  (23, 0.089036325973304714),
  (27, 0.019183806431209195),
  (32, 0.035789522119196983),
  (41, 0.010368029918935351),
  (43, 0.025545690952374901),
  (50, 0.014988739080190544),
  (52, 0.20921497617356771),
  (69, 0.11645162015652558),
  (72, 0.131976274145

In [5]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0  # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :])  # Result stored in tsne.embedding_

CPU times: user 2.81 s, sys: 248 ms, total: 3.06 s
Wall time: 3.09 s


In [6]:

from bokeh.io import output_file
output_file('grafica.html')

In [7]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]

scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]

source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            author_names=author_names,
            author_sizes=author_sizes,
            radii=radii,
        )
    )

hover = HoverTool(
        tooltips=[
        ("author", "@author_names"),
        ("size", "@author_sizes"),
        ]
    )

p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)