Skip to content

Commit

Permalink
Merge pull request #206 from deeppavlov/dev
Browse files Browse the repository at this point in the history
Release 0.4.0
  • Loading branch information
dilyararimovna committed Oct 11, 2022
2 parents d42e1d6 + c3bb406 commit 4d5cb36
Show file tree
Hide file tree
Showing 134 changed files with 5,606 additions and 168 deletions.
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ FACT_RANDOM_SERVICE_URL=http://fact-random:8119/respond
INFILLING_SERVICE_URL=http://infilling:8122/respond
DIALOGPT_SERVICE_URL=http://dialogpt:8091/respond
DIALOGPT_CONTINUE_SERVICE_URL=http://dialogpt:8125/continue
PROMPT_STORYGPT_SERVICE_URL=http://prompt-storygpt:8127/respond
STORYGPT_SERVICE_URL=http://storygpt:8126/respond
SENTENCE_RANKER_SERVICE_URL=http://sentence-ranker:8128/respond
100 changes: 100 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,105 @@ pipeline {
}
}
}

stage('Build-ML') {
steps {
script{
startTime = currentBuild.duration
Exception ex = null
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
try {
sh '''
tests/runtests.sh MODE=clean
tests/runtests_multilingual.sh MODE=build
'''
}
catch (Exception e) {
int duration = (currentBuild.duration - startTime) / 1000
throw e
}
}
}
}
post {
failure {
script {
sh 'tests/runtests_multilingual.sh MODE=clean'
}
}
success {
script {
int duration = (currentBuild.duration - startTime) / 1000
}
}
}
}

stage('Start-ML') {
steps {
script {
startTime = currentBuild.duration
Exception ex = null
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
try {
sh 'tests/runtests_multilingual.sh MODE=clean && tests/runtests_multilingual.sh MODE=start'
}
catch (Exception e) {
int duration = (currentBuild.duration - startTime) / 1000
throw e
}
}
}
}
post {
failure {
script {
sh 'tests/runtests_multilingual.sh MODE=clean'
}
}
success {
script {
started = true
int duration = (currentBuild.duration - startTime) / 1000
}
}
aborted {
script {
sh 'tests/runtests_multilingual.sh MODE=clean'
}
}
}
}

stage('Test skills-ML') {
steps {
script {
startTime = currentBuild.duration
Exception ex = null
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
try {
sh label: 'test skills', script: 'tests/runtests_multilingual.sh MODE=test_skills'
}
catch (Exception e) {
int duration = (currentBuild.duration - startTime) / 1000
throw e
}
}
}
}
post {
success {
script {
int duration = (currentBuild.duration - startTime) / 1000
}
}
aborted {
script {
sh 'tests/runtests_multilingual.sh MODE=clean'
}
}
}
}
}
post {
aborted {
Expand All @@ -268,6 +367,7 @@ pipeline {
if (started) {
sh './tests/runtests.sh MODE=clean'
sh './tests/runtests_russian.sh MODE=clean'
sh './tests/runtests_multilingual.sh MODE=clean'
}
}
}
Expand Down
96 changes: 71 additions & 25 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions annotators/COMeT/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ requests==2.22.0
sentry-sdk[asgi]==1.3.1
jinja2<=3.0.3
Werkzeug<=2.0.3
importlib-metadata<5.0
2 changes: 1 addition & 1 deletion annotators/ConversationEvaluator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM deeppavlov/base-gpu:0.12.0
RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.0
RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.0

ARG CONFIG
ARG DATA_URL=http://files.deeppavlov.ai/alexaprize_data/cobot_conveval2.tar.gz
Expand Down
2 changes: 1 addition & 1 deletion annotators/IntentCatcherTransformers/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM deeppavlov/base-gpu:0.17.2
RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.17.2
RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.17.2

RUN apt-key del 7fa2af80 && \
rm -f /etc/apt/sources.list.d/cuda*.list && \
Expand Down
1 change: 0 additions & 1 deletion annotators/IntentCatcherTransformers/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ huggingface-hub==0.0.8
datasets==1.11.0
scikit-learn==0.21.2
xeger==0.3.5
transformers==4.6.0
torch==1.6.0
torchvision==0.7.0
cryptography==2.8
19 changes: 19 additions & 0 deletions annotators/MultilingualSentimentClassification/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime

WORKDIR /src

ARG PRETRAINED_MODEL_NAME_OR_PATH
ENV PRETRAINED_MODEL_NAME_OR_PATH ${PRETRAINED_MODEL_NAME_OR_PATH}
ARG SERVICE_PORT
ENV SERVICE_PORT ${SERVICE_PORT}

COPY ./requirements.txt /src/requirements.txt
RUN pip install -r /src/requirements.txt
RUN pip install sentencepiece

RUN python -c "from transformers import XLMRobertaTokenizer; XLMRobertaTokenizer.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"
RUN python -c "from transformers import XLMRobertaForSequenceClassification; XLMRobertaForSequenceClassification.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"

COPY . /src

CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300
1 change: 1 addition & 0 deletions annotators/MultilingualSentimentClassification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Coming soon ;)
10 changes: 10 additions & 0 deletions annotators/MultilingualSentimentClassification/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
transformers==4.20.1
flask==1.1.1
gunicorn==19.9.0
requests==2.22.0
sentry-sdk[flask]==0.14.1
healthcheck==1.3.3
itsdangerous==2.0.1
jinja2<=3.0.3
Werkzeug<=2.0.3
torch==1.8
78 changes: 78 additions & 0 deletions annotators/MultilingualSentimentClassification/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import time
import os

import sentry_sdk
import torch
from flask import Flask, request, jsonify
from sentry_sdk.integrations.flask import FlaskIntegration
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer

sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), integrations=[FlaskIntegration()])


logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
logger = logging.getLogger(__name__)

PRETRAINED_MODEL_NAME_OR_PATH = os.environ.get("PRETRAINED_MODEL_NAME_OR_PATH")
logging.info(f"PRETRAINED_MODEL_NAME_OR_PATH = {PRETRAINED_MODEL_NAME_OR_PATH}")
columns = ["negative", "neutral", "positive"]

try:
tokenizer = XLMRobertaTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model = XLMRobertaForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model.eval()
if torch.cuda.is_available():
model.to("cuda")
logger.info("sentiment-classification is set to run on cuda")

logger.info("sentiment-classification model is ready")
except Exception as e:
sentry_sdk.capture_exception(e)
logger.exception(e)
raise e

app = Flask(__name__)
logging.getLogger("werkzeug").setLevel("WARNING")


def classify_sentences(sentences):
try:
inputs = tokenizer(sentences, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
if torch.cuda.is_available():
inputs = inputs.to("cuda")
outputs = model(**inputs)[0]
model_output = torch.nn.functional.softmax(outputs, dim=-1).cpu()
result = []

for i, cla in zip(sentences, model_output):
result += [{columns[id_column]: float(cla[id_column]) for id_column in range(len(columns))}]

except Exception as exc:
logger.exception(exc)
sentry_sdk.capture_exception(exc)
result = [{column: 0.0 for column in columns}] * len(sentences)
return result


@app.route("/respond", methods=["POST"])
def respond():
st_time = time.time()
sentences = request.json.get("sentences", [])
result = classify_sentences(sentences)
total_time = time.time() - st_time
logger.info(f"sentiment-classification exec time: {total_time:.3f}s")

return jsonify(result)


@app.route("/respond_batch", methods=["POST"])
def respond_batch():
st_time = time.time()
sentences = request.json.get("sentences", [])
result = classify_sentences(sentences)
total_time = time.time() - st_time
logger.info(f"sentiment-classification exec time: {total_time:.3f}s")

return jsonify([{"batch": result}])
20 changes: 20 additions & 0 deletions annotators/MultilingualSentimentClassification/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import requests


def test_respond():
url = "http://0.0.0.0:8024/respond"

sentences = ["Hola! Como estas?", "örnek metin", "Болван несчастный"]
gold = [
{"negative": 0.0274, "neutral": 0.706, "positive": 0.2666},
{"negative": 0.29077, "neutral": 0.33038, "positive": 0.37885},
{"negative": 0.94606, "neutral": 0.03936, "positive": 0.01458},
]
request_data = {"sentences": sentences}
result = requests.post(url, json=request_data).json()
assert [{i: round(j[i], 5) for i in j} for j in result] == gold, f"Got\n{result}"
print("Success!")


if __name__ == "__main__":
test_respond()
3 changes: 3 additions & 0 deletions annotators/MultilingualSentimentClassification/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

python test.py
18 changes: 18 additions & 0 deletions annotators/MultilingualToxicClassification/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime

WORKDIR /src

ARG PRETRAINED_MODEL_NAME_OR_PATH
ENV PRETRAINED_MODEL_NAME_OR_PATH ${PRETRAINED_MODEL_NAME_OR_PATH}
ARG SERVICE_PORT
ENV SERVICE_PORT ${SERVICE_PORT}

COPY ./requirements.txt /src/requirements.txt
RUN pip install -r /src/requirements.txt

#RUN python -c "import torch; torch.hub.load_state_dict_from_url('${PRETRAINED_MODEL_NAME_OR_PATH}')"
#RUN python -c "from transformers import AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"

COPY . /src

CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300
1 change: 1 addition & 0 deletions annotators/MultilingualToxicClassification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Coming soon ;)
Binary file not shown.
11 changes: 11 additions & 0 deletions annotators/MultilingualToxicClassification/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
transformers==4.20.1
sentencepiece==0.1.94
flask==1.1.1
gunicorn==19.9.0
requests==2.22.0
sentry-sdk[flask]==0.14.1
healthcheck==1.3.3
itsdangerous==2.0.1
jinja2<=3.0.3
Werkzeug<=2.0.3
torch==1.8
Loading

0 comments on commit 4d5cb36

Please sign in to comment.