Release 0.6.1 (#1015)

deeppavlov · Sep 26, 2019 · a951492 · a951492
2 parents 3ab31e3 + ead21c5
commit a951492
Show file tree

Hide file tree

Showing 26 changed files with 2,239 additions and 1,607 deletions.
diff --git a/README.md b/README.md
@@ -11,12 +11,12 @@ DeepPavlov is designed for
 
 ## Quick Links
 
-* Demo [*demo.ipavlov.ai*](https://demo.ipavlov.ai/)
+* Demo [*demo.deeppavlov.ai*](https://demo.deeppavlov.ai/)
 * Documentation [*docs.deeppavlov.ai*](http://docs.deeppavlov.ai/)
     * Model List [*docs:features/*](http://docs.deeppavlov.ai/en/master/features/overview.html)
     * Contribution Guide [*docs:contribution_guide/*](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html)
 * Issues [*github/issues/*](https://github.com/deepmipt/DeepPavlov/issues)
-* Forum [*forum.ipavlov.ai*](https://forum.ipavlov.ai/)
+* Forum [*forum.deeppavlov.ai*](https://forum.deeppavlov.ai/)
 * Blogs [*medium.com/deeppavlov*](https://medium.com/deeppavlov)
 * Tutorials [*examples/*](https://github.com/deepmipt/DeepPavlov/tree/master/examples) and [extended colab tutorials](https://github.com/deepmipt/dp_tutorials)
 * Docker Hub [*hub.docker.com/u/deeppavlov/*](https://hub.docker.com/u/deeppavlov/) 

diff --git a/deeppavlov/__init__.py b/deeppavlov/__init__.py
@@ -37,7 +37,7 @@ def evaluate_model(config: [str, Path, dict], download: bool = False, recursive:
 except ImportError:
     'Assuming that requirements are not yet installed'
 
-__version__ = '0.6.0'
+__version__ = '0.6.1'
 __author__ = 'Neural Networks and Deep Learning lab, MIPT'
 __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
 __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']

diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json b/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json
@@ -1,7 +1,7 @@
 {
   "dataset_reader": {
     "class_name": "dstc2_reader",
-    "data_path": "{DOWNLOADS_PATH}/dstc2"
+    "data_path": "{DATA_PATH}"
   },
   "dataset_iterator": {
     "class_name": "dialog_iterator"
@@ -21,49 +21,41 @@
         "id": "word_vocab",
         "class_name": "simple_vocab",
         "fit_on": ["x_tokens"],
-        "save_path": "{MODELS_PATH}/gobot_dstc2_minimal/word.dict",
-        "load_path": "{MODELS_PATH}/gobot_dstc2_minimal/word.dict"
-      },
-      {
-        "id": "restaurant_database",
-        "class_name": "sqlite_database",
-        "table_name": "mytable",
-        "primary_keys": ["name"],
-        "save_path": "{DOWNLOADS_PATH}/dstc2/resto.sqlite"
+        "save_path": "{MODEL_PATH}/word.dict",
+        "load_path": "{MODEL_PATH}/word.dict"
       },
       {
         "class_name": "go_bot",
-        "load_path": "{MODELS_PATH}/gobot_dstc2_minimal/model",
-        "save_path": "{MODELS_PATH}/gobot_dstc2_minimal/model",
+        "load_path": "{MODEL_PATH}/model",
+        "save_path": "{MODEL_PATH}/model",
         "in": ["x"],
         "in_y": ["y"],
         "out": ["y_predicted"],
         "main": true,
         "debug": false,
-        "learning_rate": [2e-3, 2e-2],
-        "learning_rate_decay": "1cycle",
-        "learning_rate_decay_epochs": 20,
+        "learning_rate": 0.003,
+        "learning_rate_drop_patience": 5,
+        "learning_rate_drop_div": 10.0,
         "momentum": 0.95,
         "optimizer": "tensorflow.train:AdamOptimizer",
         "clip_norm": 2.0,
-        "dropout_rate": 0.3,
-        "l2_reg_coef": 1e-4,
+        "dropout_rate": 0.4,
+        "l2_reg_coef": 3e-4,
         "hidden_size": 128,
-        "dense_size": 64,
+        "dense_size": 160,
         "word_vocab": "#word_vocab",
-        "template_path": "{DOWNLOADS_PATH}/dstc2/dstc2-templates.txt",
-        "template_type": "DualTemplate",
-        "database": "#restaurant_database",
-        "api_call_action": "api_call",
+        "template_path": "{DATA_PATH}/dstc2-templates.txt",
+        "template_type": "DefaultTemplate",
+        "database": null,
+        "api_call_action": null,
         "use_action_mask": false,
         "slot_filler": null,
         "intent_classifier": null,
-        "embedder": null,
-        "bow_embedder": {
-          "class_name": "bow",
-          "depth": "#word_vocab.__len__()",
-          "with_counts": true
+        "embedder": {
+          "class_name": "glove",
+          "load_path": "{DOWNLOADS_PATH}/embeddings/glove.6B.100d.txt"
         },
+        "bow_embedder": null,
         "tokenizer": {
           "class_name": "stream_spacy_tokenizer",
           "lowercase": false
@@ -81,10 +73,10 @@
 
     "metrics": ["per_item_dialog_accuracy"],
     "validation_patience": 10,
-    "val_every_n_epochs": 1,
+    "val_every_n_batches": 15,
 
-    "log_every_n_batches": -1,
-    "log_every_n_epochs": 1,
+    "log_every_n_batches": 15,
+    "log_on_k_batches": -1,
     "show_examples": false,
     "evaluation_targets": [
       "valid",
@@ -95,12 +87,15 @@
   "metadata": {
     "variables": {
       "ROOT_PATH": "~/.deeppavlov",
+      "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
-      "MODELS_PATH": "{ROOT_PATH}/models"
+      "DATA_PATH": "{DOWNLOADS_PATH}/dstc2",
+      "MODELS_PATH": "{ROOT_PATH}/models",
+      "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2_minimal"
     },
     "requirements": [
       "{DEEPPAVLOV_PATH}/requirements/tf.txt",
-      "{DEEPPAVLOV_PATH}/requirements/fasttext.txt",
+      "{DEEPPAVLOV_PATH}/requirements/gensim.txt",
       "{DEEPPAVLOV_PATH}/requirements/spacy.txt",
       "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt"
     ],
@@ -109,9 +104,17 @@
       "server_utils": "GoalOrientedBot"
     },
     "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz",
+        "subdir": "{MODELS_PATH}"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt",
+        "subdir": "{DOWNLOADS_PATH}/embeddings"
+      },
       {
         "url": "http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz",
-        "subdir": "{DOWNLOADS_PATH}/dstc2"
+        "subdir": "{DATA_PATH}"
       }
     ]
   }

diff --git a/deeppavlov/dataset_readers/conll2003_reader.py b/deeppavlov/dataset_readers/conll2003_reader.py
@@ -1,9 +1,11 @@
 from pathlib import Path
+from logging import getLogger
 
 from deeppavlov.core.common.registry import register
 from deeppavlov.core.data.dataset_reader import DatasetReader
 from deeppavlov.core.data.utils import download_decompress
 
+log = getLogger(__name__)
 
 @register('conll2003_reader')
 class Conll2003DatasetReader(DatasetReader):
@@ -80,10 +82,19 @@ def parse_ner_file(self, file_name: Path):
                         tags = []
                 else:
                     if self.provide_pos:
-                        token, pos, *_, tag = line.split()
-                        pos_tags.append(pos)
+                        try:
+                            token, pos, *_, tag = line.split()
+                            pos_tags.append(pos)
+                        except:
+                            log.warning('Skip {}, splitted as {}'.format(repr(line), repr(line.split())))
+                            continue
                     else:
-                        token, *_, tag = line.split()
+                        try:
+                            token, *_, tag = line.split()
+                        except:
+                            log.warning('Skip {}, splitted as {}'.format(repr(line), repr(line.split())))
+                            continue
+
                     tags.append(tag)
                     tokens.append(token)
 
@@ -111,4 +122,3 @@ def _iob2_to_iob(tags):
             iob_tags.append(tag)
 
         return iob_tags
-