Merge pull request #206 from deeppavlov/dev

Release 0.4.0
deeppavlov · Oct 11, 2022 · 4d5cb36 · 4d5cb36
2 parents d42e1d6 + c3bb406
commit 4d5cb36
Show file tree

Hide file tree

Showing 134 changed files with 5,606 additions and 168 deletions.
diff --git a/.env b/.env
@@ -32,3 +32,6 @@ FACT_RANDOM_SERVICE_URL=http://fact-random:8119/respond
 INFILLING_SERVICE_URL=http://infilling:8122/respond
 DIALOGPT_SERVICE_URL=http://dialogpt:8091/respond
 DIALOGPT_CONTINUE_SERVICE_URL=http://dialogpt:8125/continue
+PROMPT_STORYGPT_SERVICE_URL=http://prompt-storygpt:8127/respond
+STORYGPT_SERVICE_URL=http://storygpt:8126/respond
+SENTENCE_RANKER_SERVICE_URL=http://sentence-ranker:8128/respond
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -256,6 +256,105 @@ pipeline {
         }
       }
     }
+
+    stage('Build-ML') {
+      steps {
+        script{
+          startTime = currentBuild.duration
+          Exception ex = null
+          catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
+            try {
+              sh '''
+                tests/runtests.sh MODE=clean
+                tests/runtests_multilingual.sh MODE=build
+              '''
+            }
+            catch (Exception e) {
+              int duration = (currentBuild.duration - startTime) / 1000
+              throw e
+            }
+          }
+        }
+      }
+      post {
+        failure {
+          script {
+            sh 'tests/runtests_multilingual.sh MODE=clean'
+          }
+        }
+        success {
+          script {
+            int duration = (currentBuild.duration - startTime) / 1000
+          }
+        }
+      }
+    }
+
+    stage('Start-ML') {
+      steps {
+        script {
+          startTime = currentBuild.duration
+          Exception ex = null
+          catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
+            try {
+              sh 'tests/runtests_multilingual.sh MODE=clean && tests/runtests_multilingual.sh MODE=start'
+            }
+            catch (Exception e) {
+              int duration = (currentBuild.duration - startTime) / 1000
+              throw e
+            }
+          }
+        }
+      }
+      post {
+        failure {
+          script {
+            sh 'tests/runtests_multilingual.sh MODE=clean'
+          }
+        }
+        success {
+          script {
+            started = true
+            int duration = (currentBuild.duration - startTime) / 1000
+          }
+        }
+        aborted {
+          script {
+            sh 'tests/runtests_multilingual.sh MODE=clean'
+          }
+        }
+      }
+    }
+
+    stage('Test skills-ML') {
+      steps {
+        script {
+          startTime = currentBuild.duration
+          Exception ex = null
+          catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
+            try {
+              sh label: 'test skills', script: 'tests/runtests_multilingual.sh MODE=test_skills'
+            }
+            catch (Exception e) {
+              int duration = (currentBuild.duration - startTime) / 1000
+              throw e
+            }
+          }
+        }
+      }
+      post {
+        success {
+          script {
+            int duration = (currentBuild.duration - startTime) / 1000
+          }
+        }
+        aborted {
+          script {
+            sh 'tests/runtests_multilingual.sh MODE=clean'
+          }
+        }
+      }
+    }
   }
   post {
     aborted {
@@ -268,6 +367,7 @@ pipeline {
         if (started) {
           sh './tests/runtests.sh MODE=clean'
           sh './tests/runtests_russian.sh MODE=clean'
+          sh './tests/runtests_multilingual.sh MODE=clean'
         }
       }
     }

diff --git a/README.md b/README.md
diff --git a/annotators/COMeT/requirements.txt b/annotators/COMeT/requirements.txt
@@ -8,3 +8,4 @@ requests==2.22.0
 sentry-sdk[asgi]==1.3.1
 jinja2<=3.0.3
 Werkzeug<=2.0.3
+importlib-metadata<5.0
diff --git a/annotators/ConversationEvaluator/Dockerfile b/annotators/ConversationEvaluator/Dockerfile
@@ -1,5 +1,5 @@
 FROM deeppavlov/base-gpu:0.12.0
-RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.0
+RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.0
 
 ARG CONFIG
 ARG DATA_URL=http://files.deeppavlov.ai/alexaprize_data/cobot_conveval2.tar.gz

diff --git a/annotators/IntentCatcherTransformers/Dockerfile b/annotators/IntentCatcherTransformers/Dockerfile
@@ -1,5 +1,5 @@
 FROM deeppavlov/base-gpu:0.17.2
-RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.17.2
+RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.17.2
 
 RUN apt-key del 7fa2af80  && \
     rm -f /etc/apt/sources.list.d/cuda*.list && \

diff --git a/annotators/IntentCatcherTransformers/requirements.txt b/annotators/IntentCatcherTransformers/requirements.txt
@@ -13,7 +13,6 @@ huggingface-hub==0.0.8
 datasets==1.11.0
 scikit-learn==0.21.2
 xeger==0.3.5
-transformers==4.6.0
 torch==1.6.0
 torchvision==0.7.0
 cryptography==2.8
diff --git a/annotators/MultilingualSentimentClassification/Dockerfile b/annotators/MultilingualSentimentClassification/Dockerfile
@@ -0,0 +1,19 @@
+FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime
+
+WORKDIR /src
+
+ARG PRETRAINED_MODEL_NAME_OR_PATH
+ENV PRETRAINED_MODEL_NAME_OR_PATH ${PRETRAINED_MODEL_NAME_OR_PATH}
+ARG SERVICE_PORT
+ENV SERVICE_PORT ${SERVICE_PORT}
+
+COPY ./requirements.txt /src/requirements.txt
+RUN pip install -r /src/requirements.txt
+RUN pip install sentencepiece
+
+RUN python -c "from transformers import XLMRobertaTokenizer; XLMRobertaTokenizer.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"
+RUN python -c "from transformers import XLMRobertaForSequenceClassification; XLMRobertaForSequenceClassification.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"
+
+COPY . /src
+
+CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300
diff --git a/annotators/MultilingualSentimentClassification/README.md b/annotators/MultilingualSentimentClassification/README.md
@@ -0,0 +1 @@
+Coming soon ;)
diff --git a/annotators/MultilingualSentimentClassification/requirements.txt b/annotators/MultilingualSentimentClassification/requirements.txt
@@ -0,0 +1,10 @@
+transformers==4.20.1
+flask==1.1.1
+gunicorn==19.9.0
+requests==2.22.0
+sentry-sdk[flask]==0.14.1
+healthcheck==1.3.3
+itsdangerous==2.0.1
+jinja2<=3.0.3
+Werkzeug<=2.0.3
+torch==1.8
diff --git a/annotators/MultilingualSentimentClassification/server.py b/annotators/MultilingualSentimentClassification/server.py
@@ -0,0 +1,78 @@
+import logging
+import time
+import os
+
+import sentry_sdk
+import torch
+from flask import Flask, request, jsonify
+from sentry_sdk.integrations.flask import FlaskIntegration
+from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
+
+sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), integrations=[FlaskIntegration()])
+
+
+logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_NAME_OR_PATH = os.environ.get("PRETRAINED_MODEL_NAME_OR_PATH")
+logging.info(f"PRETRAINED_MODEL_NAME_OR_PATH = {PRETRAINED_MODEL_NAME_OR_PATH}")
+columns = ["negative", "neutral", "positive"]
+
+try:
+    tokenizer = XLMRobertaTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
+    model = XLMRobertaForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
+    model.eval()
+    if torch.cuda.is_available():
+        model.to("cuda")
+        logger.info("sentiment-classification is set to run on cuda")
+
+    logger.info("sentiment-classification model is ready")
+except Exception as e:
+    sentry_sdk.capture_exception(e)
+    logger.exception(e)
+    raise e
+
+app = Flask(__name__)
+logging.getLogger("werkzeug").setLevel("WARNING")
+
+
+def classify_sentences(sentences):
+    try:
+        inputs = tokenizer(sentences, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            if torch.cuda.is_available():
+                inputs = inputs.to("cuda")
+            outputs = model(**inputs)[0]
+            model_output = torch.nn.functional.softmax(outputs, dim=-1).cpu()
+            result = []
+
+            for i, cla in zip(sentences, model_output):
+                result += [{columns[id_column]: float(cla[id_column]) for id_column in range(len(columns))}]
+
+    except Exception as exc:
+        logger.exception(exc)
+        sentry_sdk.capture_exception(exc)
+        result = [{column: 0.0 for column in columns}] * len(sentences)
+    return result
+
+
+@app.route("/respond", methods=["POST"])
+def respond():
+    st_time = time.time()
+    sentences = request.json.get("sentences", [])
+    result = classify_sentences(sentences)
+    total_time = time.time() - st_time
+    logger.info(f"sentiment-classification exec time: {total_time:.3f}s")
+
+    return jsonify(result)
+
+
+@app.route("/respond_batch", methods=["POST"])
+def respond_batch():
+    st_time = time.time()
+    sentences = request.json.get("sentences", [])
+    result = classify_sentences(sentences)
+    total_time = time.time() - st_time
+    logger.info(f"sentiment-classification exec time: {total_time:.3f}s")
+
+    return jsonify([{"batch": result}])
diff --git a/annotators/MultilingualSentimentClassification/test.py b/annotators/MultilingualSentimentClassification/test.py
@@ -0,0 +1,20 @@
+import requests
+
+
+def test_respond():
+    url = "http://0.0.0.0:8024/respond"
+
+    sentences = ["Hola! Como estas?", "örnek metin", "Болван несчастный"]
+    gold = [
+        {"negative": 0.0274, "neutral": 0.706, "positive": 0.2666},
+        {"negative": 0.29077, "neutral": 0.33038, "positive": 0.37885},
+        {"negative": 0.94606, "neutral": 0.03936, "positive": 0.01458},
+    ]
+    request_data = {"sentences": sentences}
+    result = requests.post(url, json=request_data).json()
+    assert [{i: round(j[i], 5) for i in j} for j in result] == gold, f"Got\n{result}"
+    print("Success!")
+
+
+if __name__ == "__main__":
+    test_respond()
diff --git a/annotators/MultilingualSentimentClassification/test.sh b/annotators/MultilingualSentimentClassification/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python test.py
diff --git a/annotators/MultilingualToxicClassification/Dockerfile b/annotators/MultilingualToxicClassification/Dockerfile
@@ -0,0 +1,18 @@
+FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime
+
+WORKDIR /src
+
+ARG PRETRAINED_MODEL_NAME_OR_PATH
+ENV PRETRAINED_MODEL_NAME_OR_PATH ${PRETRAINED_MODEL_NAME_OR_PATH}
+ARG SERVICE_PORT
+ENV SERVICE_PORT ${SERVICE_PORT}
+
+COPY ./requirements.txt /src/requirements.txt
+RUN pip install -r /src/requirements.txt
+
+#RUN python -c "import torch; torch.hub.load_state_dict_from_url('${PRETRAINED_MODEL_NAME_OR_PATH}')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"
+
+COPY . /src
+
+CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300
diff --git a/annotators/MultilingualToxicClassification/README.md b/annotators/MultilingualToxicClassification/README.md
@@ -0,0 +1 @@
+Coming soon ;)
diff --git a/annotators/MultilingualToxicClassification/multilingual_toxic_xlm_r b/annotators/MultilingualToxicClassification/multilingual_toxic_xlm_r
diff --git a/annotators/MultilingualToxicClassification/requirements.txt b/annotators/MultilingualToxicClassification/requirements.txt
@@ -0,0 +1,11 @@
+transformers==4.20.1
+sentencepiece==0.1.94
+flask==1.1.1
+gunicorn==19.9.0
+requests==2.22.0
+sentry-sdk[flask]==0.14.1
+healthcheck==1.3.3
+itsdangerous==2.0.1
+jinja2<=3.0.3
+Werkzeug<=2.0.3
+torch==1.8