bigartm · JeanPaulShapo · Jul 22, 2016 · Jul 22, 2016 · ofrei · Jul 22, 2016
diff --git a/.gitignore b/.gitignore
@@ -125,3 +125,6 @@ src/artm/messages.pb.h
 /python/dist/*
 
 /src/artm/version.h
+
+# configuration file for YouCompleteMe
+.ycm_extra_conf.py
diff --git a/src/artm/core/collection_parser.cc b/src/artm/core/collection_parser.cc
@@ -221,7 +221,7 @@ CollectionParser::TokenMap CollectionParser::ParseVocabBagOfWordsUci() {
   int token_id = 0;
   while (!vocab.eof()) {
     std::getline(vocab, str);
-    if (vocab.eof())
+    if (vocab.eof() && str.empty())
       break;
 
     boost::algorithm::trim(str);
@@ -445,3 +445,4 @@ void CollectionParser::Parse() {
 
 }  // namespace core
 }  // namespace artm
+// vim: set ts=2 sw=2:
diff --git a/src/artm/core/dictionary.cc b/src/artm/core/dictionary.cc
@@ -256,7 +256,7 @@ Dictionary::Gather(const GatherDictionaryArgs& args,
       int token_id = 0;
       while (!vocab.eof()) {
         std::getline(vocab, str);
-        if (vocab.eof())
+        if (vocab.eof() && str.empty())
           break;
 
         boost::algorithm::trim(str);
@@ -545,3 +545,4 @@ float Dictionary::CountTopicCoherence(const std::vector<core::Token>& tokens_to_
 
 }  // namespace core
 }  // namespace artm
+// vim: set ts=2 sw=2:
diff --git a/src/artm_tests/collection_parser_test.cc b/src/artm_tests/collection_parser_test.cc
@@ -46,36 +46,48 @@ TEST(CollectionParser, UciBagOfWords) {
 
   artm::MasterModelConfig master_config;
   ::artm::MasterModel mc(master_config);
-  artm::GatherDictionaryArgs gather_config;
-  gather_config.set_data_path(target_folder);
-  gather_config.set_vocab_file_path(config.vocab_file_path());
-  gather_config.set_dictionary_target_name("mydictionary");
-  mc.GatherDictionary(gather_config);
 
-  ::artm::GetDictionaryArgs get_dictionary_args;
-  get_dictionary_args.set_dictionary_name("mydictionary");
-  auto dictionary = mc.GetDictionary(get_dictionary_args);
-  ASSERT_EQ(dictionary.token_size(), 3);
+  auto dictionary_checker = [&mc, &target_folder] (
+    const std::string &path_to_vocab,
+    const std::string &dict_name) -> void {
+    // first of all, we gather dictionary into the core
+    artm::GatherDictionaryArgs gather_config;
+    gather_config.set_data_path(target_folder);
+    gather_config.set_vocab_file_path(path_to_vocab);
+    gather_config.set_dictionary_target_name(dict_name);
+    mc.GatherDictionary(gather_config);
+
+    // next, we retrieve it from the core
+    ::artm::GetDictionaryArgs get_dictionary_args;
+    get_dictionary_args.set_dictionary_name(dict_name);
+    auto dict = mc.GetDictionary(get_dictionary_args);
+
+    // now we check its consistency
+    ASSERT_EQ(dict.token_size(), 3);
+
+    EXPECT_EQ(dict.token(0), "token1");
+    EXPECT_EQ(dict.token(1), "token2");
+    EXPECT_EQ(dict.token(2), "token3");
 
-  EXPECT_EQ(dictionary.token(0), "token1");
-  EXPECT_EQ(dictionary.token(1), "token2");
-  EXPECT_EQ(dictionary.token(2), "token3");
+    EXPECT_EQ(dict.class_id(0), "@default_class");
+    EXPECT_EQ(dict.class_id(1), "@default_class");
+    EXPECT_EQ(dict.class_id(2), "@default_class");
 
-  EXPECT_EQ(dictionary.class_id(0), "@default_class");
-  EXPECT_EQ(dictionary.class_id(1), "@default_class");
-  EXPECT_EQ(dictionary.class_id(2), "@default_class");
+    EXPECT_EQ(dict.token_df(0), 1);
+    EXPECT_EQ(dict.token_df(1), 2);
+    EXPECT_EQ(dict.token_df(2), 2);
 
-  EXPECT_EQ(dictionary.token_df(0), 1);
-  EXPECT_EQ(dictionary.token_df(1), 2);
-  EXPECT_EQ(dictionary.token_df(2), 2);
+    EXPECT_EQ(dict.token_tf(0), 5);
+    EXPECT_EQ(dict.token_tf(1), 4);
+    EXPECT_EQ(dict.token_tf(2), 9);
 
-  EXPECT_EQ(dictionary.token_tf(0), 5);
-  EXPECT_EQ(dictionary.token_tf(1), 4);
-  EXPECT_EQ(dictionary.token_tf(2), 9);
+    ASSERT_APPROX_EQ(dict.token_value(0), 5.0 / 18.0);
+    ASSERT_APPROX_EQ(dict.token_value(1), 2.0 / 9.0);
+    ASSERT_APPROX_EQ(dict.token_value(2), 0.5);
+  };
 
-  ASSERT_APPROX_EQ(dictionary.token_value(0), 5.0 / 18.0);
-  ASSERT_APPROX_EQ(dictionary.token_value(1), 2.0 / 9.0);
-  ASSERT_APPROX_EQ(dictionary.token_value(2), 0.5);
+  dictionary_checker(config.vocab_file_path(), "default_dictionary");
+  dictionary_checker("../../../test_data/vocab.parser_test_no_newline.txt", "no_newline_dictionary");
 
   try { boost::filesystem::remove_all(target_folder); }
   catch (...) {}
@@ -241,3 +253,4 @@ TEST(CollectionParser, VowpalWabbit) {
   try { boost::filesystem::remove_all(target_folder); }
   catch (...) {}
 }
+// vim: set ts=2 sw=2 sts=2:
diff --git a/test_data/vocab.parser_test_no_newline.txt b/test_data/vocab.parser_test_no_newline.txt
@@ -0,0 +1,3 @@
+token1
+token2
+token3