In [None]:
import numpy as np
import pandas as pd
import gensim
import gensim.corpora as corpora

In [None]:
def lda_implement(vocabulary, alpha, beta, xi, num_of_docs):
    documents = []
    for i in range(num_of_docs):
        topic_proportions = np.random.dirichlet(alpha)
        doc_size = np.random.poisson(xi)
        document = []
        for j in range(doc_size):
            topic = np.random.choice(a=range(len(topic_proportions)), p=topic_proportions)
            word_proportions = beta[topic]
            word = np.random.choice(a=range(len(word_proportions)), p=word_proportions)
            document.append(vocabulary[word])
        documents.append(document)
    return documents

In [None]:
vocabulary = ['bass', 'pike', 'deep', 'tuba', 'horn', 'catapult']
beta = np.array([
[0.4, 0.4, 0.2, 0.0, 0.0, 0.0],
[0.0, 0.3, 0.1, 0.0, 0.3, 0.3],
[0.3, 0.0, 0.2, 0.3, 0.2, 0.0]
])
alpha = np.array([1, 3, 8])
xi = 50
documents = lda_implement(vocabulary, alpha, beta, xi, 1)
documents[0]

['catapult',
 'pike',
 'horn',
 'deep',
 'pike',
 'horn',
 'deep',
 'tuba',
 'bass',
 'horn',
 'tuba',
 'horn',
 'deep',
 'horn',
 'tuba',
 'bass',
 'bass',
 'bass',
 'horn',
 'horn',
 'pike',
 'bass',
 'bass',
 'bass',
 'deep',
 'bass',
 'tuba',
 'deep',
 'tuba',
 'deep',
 'tuba',
 'bass',
 'horn',
 'tuba']

In [None]:
def lda_infer():
    documents = lda_implement(vocabulary, alpha, beta, xi, 200)
    id2word = corpora.Dictionary(documents)
    corpus = [id2word.doc2bow(document) for document in documents]
    lda_model = gensim.models.LdaModel(corpus=corpus,
                                       id2word=id2word,
                                       num_topics=3,
                                       passes=15,
                                       alpha='auto')
    print(lda_model.show_topic(0))
    print(lda_model.show_topic(1))
    print(lda_model.show_topic(2))

In [None]:
lda_infer()

[('pike', 0.30401477), ('catapult', 0.18735063), ('bass', 0.15028138), ('horn', 0.14414605), ('deep', 0.13256653), ('tuba', 0.0816406)]
[('horn', 0.25470555), ('bass', 0.24845959), ('tuba', 0.16694373), ('deep', 0.15288119), ('catapult', 0.08957705), ('pike', 0.0874329)]
[('tuba', 0.30434415), ('bass', 0.26315826), ('deep', 0.21245019), ('horn', 0.14633167), ('pike', 0.07109366), ('catapult', 0.0026221033)]
