In [1]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

In [2]:
# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

In [3]:
import os
import sys

if os.path.abspath('../..') not in sys.path:
    sys.path.append(os.path.abspath('../..'))

In [5]:
from gensim.models import AuthorTopicModel as _AuthorTopicModel
from gensim.corpora import mmcorpus
from gensim.test.utils import common_dictionary, datapath, temporary_file

In [6]:
author2doc = {
    'john': [0, 1, 2, 3, 4, 5, 6],
    'jane': [2, 3, 4, 5, 6, 7, 8],
    'jack': [0, 2, 4, 6, 8]
}

In [7]:
corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))

In [9]:
with temporary_file('serialized') as s_path:
    model = _AuthorTopicModel(
        corpus,
        author2doc=author2doc, 
        id2word=common_dictionary, 
        num_topics=4,
        serialized=True, 
        serialization_path=s_path,
    )
    model.update(corpus, author2doc) 

In [12]:
author_vecs = [model.get_author_topics(author) for author in model.id2author.values()]

author_vecs

[[(0, 0.926086215042209),
  (1, 0.023267036665171385),
  (2, 0.027386630351082478),
  (3, 0.023260117941537133)],
 [(2, 0.9743649945325148)],
 [(0, 0.8998490763068225),
  (1, 0.03127600346051311),
  (2, 0.037587724269498475),
  (3, 0.03128719596316575)]]

In [11]:
gensim.matutils.Sparse2Corpus

[[(0, 0.926086215042209),
  (1, 0.023267036665171385),
  (2, 0.027386630351082478),
  (3, 0.023260117941537133)],
 [(2, 0.9743649945325148)],
 [(0, 0.8998490763068225),
  (1, 0.03127600346051311),
  (2, 0.037587724269498475),
  (3, 0.03128719596316575)]]

In [16]:
from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary
from gensim.models.ldamodel import LdaModel as _LdaModel

common_texts

[['human', 'interface', 'computer'],
 ['survey', 'user', 'computer', 'system', 'response', 'time'],
 ['eps', 'user', 'interface', 'system'],
 ['system', 'human', 'system', 'eps'],
 ['user', 'response', 'time'],
 ['trees'],
 ['graph', 'trees'],
 ['graph', 'minors', 'trees'],
 ['graph', 'minors', 'survey']]

In [17]:
# Create a corpus from a list of texts
common_dictionary = Dictionary(common_texts)
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]

In [18]:
# Train the model on the corpus.
lda = _LdaModel(common_corpus, num_topics=10)

In [19]:
from gensim.test.utils import datapath

# Save model to disk.
temp_file = datapath('saved_model')
lda.save(temp_file)

In [20]:
# Load a potentially pretrained model from disk.
lda = LdaModel.load(temp_file)