Include experiment02_artm.py and its documentation (3) [skip ci]

bigartm · Feb 10, 2015 · a40682d · a40682d
1 parent 8c11914
commit a40682d
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 5 deletions.
diff --git a/docs/stories/experiment02_artm.txt b/docs/stories/experiment02_artm.txt
@@ -36,7 +36,7 @@ To proceed the experiment you need to execute the following steps:
     * ``batch_size`` (line 28) and assign it the chosen size of batch;
     * ``batches_disk_path`` (line 36) and replace the string 'wiki_10k' with the name of your directory with batches;
     * ``test_batch_name`` (line 43) and replace the string with direct batch’s name with the name of your test batch;
-    * ``tau_decor``, ``tau_phi`` and ``tau_theta`` (lines 54-56) and substitute the values you’d like to use.
+    * ``tau_decor``, ``tau_phi`` and ``tau_theta`` (lines 57-59) and substitute the values you'd like to use.
 
 3. If you want to estimate the final perplexity on another, larger test sample, put chosen batches into test folder (in ``$YOUR_HOME_DIRECTORY`` directory).
    Then find in the code of the script the declaration of variable ``save_and_test_model`` (line 30) and assign it ``True``.

diff --git a/src/python/experiments/experiment02_artm.py b/src/python/experiments/experiment02_artm.py
@@ -47,8 +47,11 @@
 # path with batches for final held-out estimation
 test_batches_folder     = home_folder + 'test'
 
+# number of documents be processed without regularization
+first_documents         = 70000
+
 # number of documents be re-processed after last iteration
-first_documents         = 80000
+last_documents         = 80000
 
 # tau coefficients for ARTM
 tau_decor               = 5.8e+5
@@ -167,7 +170,7 @@
         first_sync = False
 
         # update tau_coefficients of regularizers in Model
-        if (need_to_update and (next_items_processed > first_documents or sync_count == 5) and (outer_iteration == 0)):
+        if (need_to_update and (next_items_processed >= first_documents) and (outer_iteration == 0)):
           config_copy = artm.messages_pb2.ModelConfig()
           config_copy.CopyFrom(model.config())
           config_copy.regularizer_tau[0] = tau_decor
@@ -184,7 +187,7 @@
 
         perplexity_score_value = -1
         if (test_on_this_iter % test_every == 0) or\
-           (current_items_processed > first_documents and outer_iteration == outer_iterations_count - 1):
+           (current_items_processed > last_documents and outer_iteration == outer_iterations_count - 1):
           perplexity_score_value = perplexity_score.GetValue(model = model, batch = test_batch).value
         test_on_this_iter += 1
 
@@ -231,7 +234,7 @@
           topic_kernel_contrast_file.write('(' + str(items_processed_score_value) +\
               ', ' + str(round(topic_kernel_score_value.average_kernel_contrast, 3)) + ')\n')
 
-        if ((current_items_processed > first_documents) and (outer_iteration == outer_iterations_count - 1)):
+        if ((current_items_processed > last_documents) and (outer_iteration == outer_iterations_count - 1)):
           print 'All elapsed time = ' + str(elapsed_time)
           if (save_and_test_model):
             print 'Saving topic model... ',