Release 0.0.8

deeppavlov · Oct 10, 2018 · 14615e6 · 14615e6
2 parents 75567ce + 932b1d9
commit 14615e6
Show file tree

Hide file tree

Showing 196 changed files with 8,679 additions and 2,641 deletions.
diff --git a/docs/.readthedocs.yml → .readthedocs.yml b/docs/.readthedocs.yml → .readthedocs.yml
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -1,36 +1,42 @@
 node('gpu') {
-    try {
-        stage('Clean') {
-            sh "rm -rf .[^.] .??* *"
+    timestamps {
+        try {
+            stage('Clean') {
+                sh "rm -rf .[^.] .??* *"
+            }
+            stage('Checkout') {
+                sh "cp -r ${pwd()}@script/* ."
+            }
+            stage('Setup') {
+                env.CUDA_VISIBLE_DEVICES=0
+                sh """
+                    virtualenv --python=python3 '.venv-$BUILD_NUMBER'
+                    . '.venv-$BUILD_NUMBER/bin/activate'
+                    pip install .[tests,docs]
+                    pip install -r dp_requirements/tf-gpu.txt
+                    rm -rf `find . -mindepth 1 -maxdepth 1 ! -name tests ! -name Jenkinsfile ! -name docs ! -name '.venv-$BUILD_NUMBER'`
+                """
+            }
+            stage('Tests') {
+                sh """
+                    . .venv-$BUILD_NUMBER/bin/activate
+                    pytest -v --disable-warnings
+                    cd docs
+                    make clean
+                    make html
+                """
+                currentBuild.result = 'SUCCESS'
+            }
         }
-        stage('Checkout') {
-            sh "cp -r ${pwd()}@script/* ."
+        catch(e) {
+            currentBuild.result = 'FAILURE'
+            throw e
         }
-        stage('Setup') {
-            env.CUDA_VISIBLE_DEVICES=0
-            sh """
-                virtualenv --python=python3 '.venv-$BUILD_NUMBER'
-                . '.venv-$BUILD_NUMBER/bin/activate'
-                pip install .[tests]
-                pip install -r dp_requirements/tf-gpu.txt
-                rm -rf `find . -mindepth 1 -maxdepth 1 ! -name tests ! -name Jenkinsfile ! -name '.venv-$BUILD_NUMBER'`
-            """
+        finally {
+            emailext to: '${DEFAULT_RECIPIENTS}',
+                subject: "${env.JOB_NAME} - Build # ${currentBuild.number} - ${currentBuild.result}!",
+                body: '${BRANCH_NAME} - ${BUILD_URL}',
+                attachLog: true
         }
-        stage('Tests') {
-            sh """
-                . .venv-$BUILD_NUMBER/bin/activate
-                pytest -v
-            """
-        }
-    } catch (e) {
-        emailext to: '${DEFAULT_RECIPIENTS}',
-            subject: '${PROJECT_NAME} - Build # ${BUILD_NUMBER} - FAILED!',
-            body: '${BRANCH_NAME} - ${BUILD_URL}',
-            attachLog: true
-        throw e
     }
-    emailext to: '${DEFAULT_RECIPIENTS}',
-        subject: '${PROJECT_NAME} - Build # ${BUILD_NUMBER} - ${BUILD_STATUS}!',
-        body: '${BRANCH_NAME} - ${BUILD_URL}',
-        attachLog: true
 }
diff --git a/README.md b/README.md
@@ -12,20 +12,21 @@ DeepPavlov is an open-source conversational AI library built on [TensorFlow](htt
 
 Import key components to build HelloBot. 
 ```python
-from deeppavlov.core.agent import Agent, HighestConfidenceSelector
 from deeppavlov.skills.pattern_matching_skill import PatternMatchingSkill
+from deeppavlov.agents.default_agent.default_agent import DefaultAgent 
+from deeppavlov.agents.processors.highest_confidence_selector import HighestConfidenceSelector
 ```
 
 Create skills as pre-defined responses for a user's input containing specific keywords. Every skill returns response and confidence.
 ```python
-hello = PatternMatchingSkill(responses=['Hello world! :)'], patterns=["hi", "hello", "good day"])
-bye = PatternMatchingSkill(['Goodbye world! :(', 'See you around.'], ["bye", "chao", "see you"])
-fallback = PatternMatchingSkill(["I don't understand, sorry :/", 'I can say "Hello world!" 8)'])
+hello = PatternMatchingSkill(responses=['Hello world!'], patterns=["hi", "hello", "good day"])
+bye = PatternMatchingSkill(['Goodbye world!', 'See you around'], patterns=["bye", "chao", "see you"])
+fallback = PatternMatchingSkill(["I don't understand, sorry", 'I can say "Hello world!"'])
 ```
 
 Agent executes skills and then takes response from the skill with the highest confidence.
 ```python
-HelloBot = Agent([hello, bye, fallback], skills_selector=HighestConfidenceSelector())
+HelloBot = DefaultAgent([hello, bye, fallback], skills_selector=HighestConfidenceSelector())
 ```
 
 Give the floor to the HelloBot!
@@ -42,19 +43,19 @@ print(HelloBot(['Hello!', 'Boo...', 'Bye.']))
 
 [Named Entity Recognition](http://docs.deeppavlov.ai/en/latest/components/ner.html) | [Slot filling](http://docs.deeppavlov.ai/en/latest/components/slot_filling.html)
 
-[Intent/Sentence Classification](http://docs.deeppavlov.ai/en/latest/components/classifiers.html) |  [Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/latest/components/neural_ranking.html)
+[Intent/Sentence Classification](http://docs.deeppavlov.ai/en/latest/components/classifiers.html) |  [Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/latest/components/squad.html) 
 
-[Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/latest/components/squad.html) 
+[Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/latest/components/neural_ranking.html) | [TF-IDF Ranking](http://docs.deeppavlov.ai/en/latest/components/tfidf_ranking.html) 
 
 [Morphological tagging](http://docs.deeppavlov.ai/en/latest/components/morphotagger.html) | [Automatic Spelling Correction](http://docs.deeppavlov.ai/en/latest/components/spelling_correction.html)
 
 **Skills**
 
 [Goal(Task)-oriented Bot](http://docs.deeppavlov.ai/en/latest/skills/go_bot.html) | [Seq2seq Goal-Oriented bot](http://docs.deeppavlov.ai/en/latest/skills/seq2seq_go_bot.html)
 
-[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/latest/skills/odqa.html)
+[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/latest/skills/odqa.html) | [eCommerce Bot](http://docs.deeppavlov.ai/en/latest/skills/ecommerce_bot_skill.html) 
 
-[Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/latest/skills/faq.html)
+[Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/latest/skills/faq.html) | [Pattern Matching](http://docs.deeppavlov.ai/en/latest/skills/pattern_matching.html) 
 
 **Embeddings**
 
@@ -68,16 +69,23 @@ print(HelloBot(['Hello!', 'Boo...', 'Bye.']))
 
 # Installation
 
-0. Currently we support only `Linux` platform and `Python 3.6` (**`Python 3.5` is not supported!**)
+0. Currently we support `Linux` and `Windows` platforms and `Python 3.6` 
+    * **`Python 3.5` is not supported!**
+    * **`Windows` platform requires `Visual Studio 2015/2017` with `C++` build tools installed!**
 
 1. Create a virtual environment with `Python 3.6`:
     ```
     virtualenv env
     ```
 2. Activate the environment:
+    * `Linux`
     ```
     source ./env/bin/activate
     ```
+    * `Windows`
+    ```
+    .\env\Scripts\activate.bat
+    ```
 3. Install the package inside this virtual environment:
     ```
     pip install deeppavlov
@@ -106,15 +114,19 @@ Then you can interact with the models or train them with the following command:
 python -m deeppavlov <mode> <path_to_config> [-d]
 ```
 
-* `<mode>` can be `train`, `predict`, `interact`, `interactbot` or `riseapi`
+* `<mode>` can be `train`, `predict`, `interact`, `interactbot`, `interactmsbot` or `riseapi`
 * `<path_to_config>` should be a path to an NLP pipeline json config (e.g. `deeppavlov/configs/ner/slotfill_dstc2.json`)
 or a name without the `.json` extension of one of the config files [provided](deeppavlov/configs) in this repository (e.g. `slotfill_dstc2`)
 
 For the `interactbot` mode you should specify Telegram bot token in `-t` parameter or in `TELEGRAM_TOKEN` environment variable. Also if you want to get custom `/start` and `/help` Telegram messages for the running model you should:
 * Add section to `utils/telegram_utils/model_info.json` with your custom Telegram messages
 * In model config file specify `metadata.labels.telegram_utils` parameter with name which refers to the added section of `utils/telegram_utils/model_info.json`
 
-For `riseapi` mode you should specify api settings (host, port, etc.) in [*utils/server_utils/server_config.json*](utils/server_utils/server_config.json) configuration file. If provided, values from *model_defaults* section override values for the same parameters from *common_defaults* section. Model names in *model_defaults* section should be similar to the class names of the models main component.
+For the `interactmsbot` mode you should specify **Microsoft app id** in `-i` and **Microsoft app secret** in `-s`. Also before launch you should specify api deployment settings (host, port) in [*utils/server_config.json*](utils/server_utils/server_config.json) configuration file. Note, that Microsoft Bot Framework requires `https` endpoint with valid certificate from CA.
+Here is [detailed info on the Microsoft Bot Framework integration](http://docs.deeppavlov.ai/en/latest/devguides/ms_bot_integration.html) 
+
+For `riseapi` mode you should specify api settings (host, port, etc.) in [*utils/server_config.json*](utils/server_utils/server_config.json) configuration file. If provided, values from *model_defaults* section override values for the same parameters from *common_defaults* section. Model names in *model_defaults* section should be similar to the class names of the models main component.
+Here is [detailed info on the DeepPavlov REST API](http://docs.deeppavlov.ai/en/latest/devguides/rest_api.html)
 
 For `predict` you can specify path to input file with `-f` or `--input-file` parameter, otherwise, data will be taken
 from stdin.  

diff --git a/deeppavlov/__init__.py b/deeppavlov/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = '0.0.7'
+__version__ = '0.0.8'
 __author__ = 'Neural Networks and Deep Learning lab, MIPT'
 __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
 __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']

diff --git a/deeppavlov/agents/__init__.py b/deeppavlov/agents/__init__.py
diff --git a/deeppavlov/agents/default_agent/__init__.py b/deeppavlov/agents/default_agent/__init__.py
diff --git a/deeppavlov/agents/default_agent/default_agent.py b/deeppavlov/agents/default_agent/default_agent.py
@@ -0,0 +1,94 @@
+# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional
+
+from deeppavlov.core.agent.agent import Agent
+from deeppavlov.core.agent.filter import Filter
+from deeppavlov.core.agent.processor import Processor
+from deeppavlov.core.skill.skill import Skill
+from deeppavlov.agents.filters.transparent_filter import TransparentFilter
+from deeppavlov.agents.processors.highest_confidence_selector import HighestConfidenceSelector
+
+
+class DefaultAgent(Agent):
+    """
+    DeepPavlov default implementation of Agent abstraction.
+
+    Default Agent is an implementation of agent template, with following
+    pipeline for each utterance batch received by agent:
+    1) Utterance batch is processed through agent Filter which selects utterances to be processed with each agent skill;
+    2) Utterances are processed through skills selected for them;
+    3) Utterances and skill responses are processed through agent Processor which generates agent's response for the outer world.
+    Defining DefaultAgent means:
+    a) To define set of skills it uses;
+    b) To implement skills Filter;
+    c) To implement Processor.
+    You can refer to :class:`deeppavlov.core.skill.Skill`, :class:`deeppavlov.core.agent.Filter`, :class:`deeppavlov.core.agent.Processor` base classes to get more info.
+
+    Args:
+        skills: List of initiated agent skills instances.
+        skills_processor: Initiated agent processor.
+        skills_filter: Initiated agent filter.
+
+    Attributes:
+        skills: List of initiated agent skills instances.
+        skills_processor: Initiated agent processor.
+        skills_filter: Initiated agent filter.
+    """
+    def __init__(self, skills: List[Skill], skills_processor: Optional[Processor]=None,
+                 skills_filter: Optional[Filter]=None, *args, **kwargs) -> None:
+        super(DefaultAgent, self).__init__(skills=skills)
+        self.skills_filter: Filter = skills_filter or TransparentFilter(len(skills))
+        self.skills_processor: Processor = skills_processor or HighestConfidenceSelector()
+
+    def _call(self, utterances_batch: list, utterances_ids: Optional[list]=None) -> list:
+        """
+        Processes batch of utterances and returns corresponding responses batch.
+
+        Each call of Agent passes incoming utterances batch through skills filter,
+        agent skills, skills processor. Batch of dialog IDs can be provided, in
+        other case utterances indexes in incoming batch are used as dialog IDs.
+
+        Args:
+            utterances_batch: Batch of incoming utterances.
+            utterances_ids: Batch of dialog IDs corresponding to incoming utterances.
+
+        Returns:
+            responses: A batch of responses corresponding to the
+                utterance batch received by agent.
+        """
+        batch_size = len(utterances_batch)
+        ids = utterances_ids or list(range(batch_size))
+        batch_history = [self.history[utt_id] for utt_id in ids]
+        responses = []
+
+        filtered = self.skills_filter(utterances_batch, batch_history)
+
+        for skill_i, (filtered_utterances, skill) in enumerate(zip(filtered, self.wrapped_skills)):
+            skill_i_utt_indexes = [utt_index for utt_index, utt_filter in enumerate(filtered_utterances) if utt_filter]
+
+            if skill_i_utt_indexes:
+                skill_i_utt_batch = [utterances_batch[i] for i in skill_i_utt_indexes]
+                res = [(None, 0.)] * batch_size
+                predicted, confidence = skill(skill_i_utt_batch, skill_i_utt_indexes)
+
+                for i, predicted, confidence in zip(skill_i_utt_indexes, predicted, confidence):
+                    res[i] = (predicted, confidence)
+
+                responses.append(res)
+
+        responses = self.skills_processor(utterances_batch, batch_history, *responses)
+
+        return responses
diff --git a/deeppavlov/agents/ecommerce_agent/__init__.py b/deeppavlov/agents/ecommerce_agent/__init__.py