In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import artm
  
#batch_vectorizer = artm.BatchVectorizer(data_path='posts-tags',data_format='batches')     
#dictionary = artm.Dictionary()  
#dictionary.load_text("posts-tags/dictionary.txt", encoding='utf-8')

batch_vectorizer = artm.BatchVectorizer(data_path='vw.tags.txt', data_format='vowpal_wabbit',target_folder='posts-tags')
dictionary = batch_vectorizer.dictionary

In [2]:
topic_names = ['topic_{}'.format(i) for i in range(1000)]
model_artm = artm.ARTM(topic_names = topic_names, num_processors = 4, cache_theta = True, theta_columns_naming = 'post',
                       scores = [artm.PerplexityScore(name = 'PerplexityScore', dictionary = dictionary), 
                                artm.SparsityPhiScore(name='SparsityPhiScore'),
                                artm.SparsityThetaScore(name='SparsityThetaScore'), 
                                artm.TopicKernelScore(name='TopicKernelScore', probability_mass_threshold=0.3)],
                       regularizers = [artm.DecorrelatorPhiRegularizer(name='DecorrelatorPhi', tau=1.5e+5), 
                                      artm.SmoothSparsePhiRegularizer(name='SparsePhi', tau=-0.1),
                                      artm.SmoothSparseThetaRegularizer(name='SparseTheta',tau=-0.15)], 
                      num_document_passes = 1, seed = 62, show_progress_bars = True)

model_artm.initialize(dictionary=dictionary)

def print_measures(model_artm):
    print('Sparsity Phi: {1:.3f}'.format(
        model_artm.score_tracker['SparsityPhiScore'].last_value))

    print('Sparsity Theta: {1:.3f}'.format(
        model_artm.score_tracker['SparsityThetaScore'].last_value))

    print('Kernel contrast: {1:.3f}'.format(
        model_artm.score_tracker['TopicKernelScore'].last_average_contrast))

    print('Kernel purity: {0:.3f} {1:.3f} (ARTM)'.format(
        model_artm.score_tracker['TopicKernelScore'].last_average_purity))

    print('Perplexity: {1:.3f}'.format(
        model_artm.score_tracker['PerplexityScore'].last_value))

    plt.plot(range(model_artm.num_phi_updates),
             model_artm.score_tracker['PerplexityScore'].value, 'r--', linewidth=2)
    plt.xlabel('Iterations count')
    plt.ylabel('ARTM PerplexityScore')
    plt.grid(True)
    plt.show()

In [5]:
%%time
model_artm.fit_online(batch_vectorizer = batch_vectorizer, asynchronous = True)

HBox(children=(IntProgress(value=0, description='Batch', max=18154, style=ProgressStyle(description_width='ini…



KeyboardInterrupt: 

In [None]:
print_measures(model_artm)