Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/sentence ranker as a service #191

Merged
merged 19 commits into from
Aug 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions assistant_dists/dream_mini/cpu.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
version: '3.7'
services:
convers-evaluator-annotator:
dialogpt:
environment:
DEVICE: cpu
CUDA_VISIBLE_DEVICES: ""
dialogpt:
intent-catcher:
environment:
DEVICE: cpu
CUDA_VISIBLE_DEVICES: ""
intent-catcher:
sentence-ranker:
environment:
DEVICE: cpu
CUDA_VISIBLE_DEVICES: ""
11 changes: 5 additions & 6 deletions assistant_dists/dream_mini/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ services:
- ".:/dp-agent"
ports:
- 4242:4242
convers-evaluator-annotator:
volumes:
- "./annotators/ConversationEvaluator:/src"
- "~/.deeppavlov:/root/.deeppavlov"
ports:
- 8004:8004
dff-program-y-skill:
volumes:
- "./skills/dff_program_y_skill:/src"
Expand Down Expand Up @@ -57,4 +51,9 @@ services:
- "./services/dialogpt:/src"
ports:
- 8125:8125
sentence-ranker:
volumes:
- "./services/sentence_ranker:/src"
ports:
- 8128:8128
version: "3.7"
41 changes: 20 additions & 21 deletions assistant_dists/dream_mini/docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,10 @@ services:
agent:
command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_mini/pipeline_conf.json'
environment:
WAIT_HOSTS: "convers-evaluator-annotator:8004, dff-program-y-skill:8008, sentseg:8011, convers-evaluation-selector:8009,
WAIT_HOSTS: "dff-program-y-skill:8008, sentseg:8011, convers-evaluation-selector:8009,
dff-intent-responder-skill:8012, intent-catcher:8014, badlisted-words:8018,
spelling-preprocessing:8074, dialogpt:8125"
spelling-preprocessing:8074, dialogpt:8125, sentence-ranker:8128"
WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480}
convers-evaluator-annotator:
env_file: [.env]
build:
args:
CONFIG: conveval.json
PORT: 8004
DATA_URL: https://files.deeppavlov.ai/alexaprize_data/cobot_conveval2.tar.gz
context: .
dockerfile: ./annotators/ConversationEvaluator/Dockerfile
environment:
- CUDA_VISIBLE_DEVICES=0
deploy:
mode: replicated
replicas: 1
resources:
limits:
memory: 2G
reservations:
memory: 2G

dff-program-y-skill:
env_file: [.env]
Expand Down Expand Up @@ -173,4 +154,22 @@ services:
reservations:
memory: 2G

sentence-ranker:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8128
PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/bert-base-nli-mean-tokens
context: ./services/sentence_ranker/
command: flask run -h 0.0.0.0 -p 8128
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
limits:
memory: 3G
reservations:
memory: 3G

version: '3.7'
6 changes: 3 additions & 3 deletions assistant_dists/dream_mini/pipeline_conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,13 @@
],
"state_manager_method": "add_hypothesis_annotation_batch"
},
"convers_evaluator_annotator": {
"sentence_ranker": {
"connector": {
"protocol": "http",
"timeout": 1,
"url": "http://convers-evaluator-annotator:8004/batch_model"
"url": "http://sentence-ranker:8128/respond"
},
"dialog_formatter": "state_formatters.dp_formatters:convers_evaluator_annotator_formatter",
"dialog_formatter": "state_formatters.dp_formatters:sentence_ranker_formatter",
"response_formatter": "state_formatters.dp_formatters:simple_formatter_service",
"previous_services": ["skills"],
"state_manager_method": "add_hypothesis_annotation_batch"
Expand Down
16 changes: 8 additions & 8 deletions assistant_dists/dream_mini/proxy.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
services:
convers-evaluator-annotator:
command: ["nginx", "-g", "daemon off;"]
build:
context: dp/proxy/
dockerfile: Dockerfile
environment:
- PROXY_PASS=dream.deeppavlov.ai:8004
- PORT=8004

dff-program-y-skill:
command: ["nginx", "-g", "daemon off;"]
Expand Down Expand Up @@ -80,4 +72,12 @@ services:
- PROXY_PASS=dream.deeppavlov.ai:8125
- PORT=8125

sentence-ranker:
command: [ "nginx", "-g", "daemon off;" ]
build:
context: dp/proxy/
dockerfile: Dockerfile
environment:
- PROXY_PASS=dream.deeppavlov.ai:8128
- PORT=8128
version: '3.7'
12 changes: 0 additions & 12 deletions common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1208,18 +1208,6 @@ def is_special_factoid_question(annotated_utterance):
)


def get_conv_eval_annotations(annotated_utterance):
default_conv_eval = {
"isResponseOnTopic": 0.0,
"isResponseInteresting": 0.0,
"responseEngagesUser": 0.0,
"isResponseComprehensible": 0.0,
"isResponseErroneous": 0.0,
}

return annotated_utterance.get("annotations", {}).get("convers_evaluator_annotator", default_conv_eval)


def get_dialog_breakdown_annotations(annotated_utterance):
breakdown = annotated_utterance.get("annotations", {}).get("dialog_breakdown", {}).get("breakdown", 0.0) > 0.5
return breakdown
24 changes: 8 additions & 16 deletions response_selectors/convers_evaluation_based_selector/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,14 @@
low_priority_intents,
substitute_nonwords,
is_toxic_or_badlisted_utterance,
get_conv_eval_annotations,
)
from tag_based_selection import tag_based_response_selection
from utils import (
add_question_to_statement,
lower_duplicates_score,
lower_retrieve_skills_confidence_if_scenario_exist,
calculate_single_convers_evaluator_score,
calculate_single_evaluator_score,
downscore_toxic_badlisted_responses,
CONV_EVAL_STRENGTH,
CONFIDENCE_STRENGTH,
how_are_you_spec,
what_i_can_do_spec,
misheard_with_spec1,
Expand Down Expand Up @@ -104,7 +101,9 @@ def respond():
)
logger.info(msg)

curr_scores += [get_conv_eval_annotations(skill_data)]
curr_scores += [
calculate_single_evaluator_score(skill_data.get("annotations"), skill_data["confidence"])
]

curr_is_toxics = np.array(curr_is_toxics)
curr_scores = np.array(curr_scores)
Expand Down Expand Up @@ -293,24 +292,17 @@ def rule_score_based_selection(dialog, candidates, scores, confidences, is_toxic
dummy_question_human_attr = candidates[i].get("human_attributes", {})

if curr_score is None:
cand_scores = scores[i]
score = scores[i]
confidence = confidences[i]
skill_name = skill_names[i]
score_conv_eval = calculate_single_convers_evaluator_score(cand_scores)
score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence
logger.info(
f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. "
f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}"
f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. " f"Toxicity: {is_toxics[i]}"
)
curr_single_scores.append(score)
else:
cand_scores = scores[i]
score = scores[i]
skill_name = skill_names[i]
score_conv_eval = calculate_single_convers_evaluator_score(cand_scores)
score = CONV_EVAL_STRENGTH * score_conv_eval + curr_score
logger.info(
f"Skill {skill_name} has final score: {score}. " f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}"
)
logger.info(f"Skill {skill_name} has final score: {score}. " f"Toxicity: {is_toxics[i]}")
curr_single_scores.append(score)

highest_conf_exist = True if any(confidences >= 1.0) else False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@
get_dialog_breakdown_annotations,
)
from utils import (
calculate_single_convers_evaluator_score,
CONV_EVAL_STRENGTH,
CONFIDENCE_STRENGTH,
how_are_you_spec,
what_i_can_do_spec,
misheard_with_spec1,
Expand Down Expand Up @@ -251,28 +248,6 @@ def acknowledgement_decision(all_user_intents):
return False


def compute_curr_single_scores(candidates, scores, confidences):
curr_single_scores = []
if all(["hypothesis_scorer" in cand["annotations"] for cand in candidates]):
for i in range(len(candidates)):
curr_single_scores.append(candidates[i]["annotations"]["hypothesis_scorer"])
else:
for i in range(len(scores)):
cand_scores = scores[i]
confidence = confidences[i]
skill_name = candidates[i]["skill_name"]
if all(["dialogrpt" in cand["annotations"] for cand in candidates]):
score_conv_eval = candidates[i]["annotations"]["dialogrpt"]
else:
score_conv_eval = calculate_single_convers_evaluator_score(cand_scores)
score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence

logger.info(f"Skill {skill_name} has final score: {score}. Confidence: {confidence}.")
curr_single_scores.append(score)

return curr_single_scores


def add_to_top1_category(cand_id, categorized, _is_require_action_intent):
if _is_require_action_intent:
categorized["active_same_topic_entity_no_db_reqda"].append(cand_id)
Expand Down Expand Up @@ -351,7 +326,9 @@ def rule_based_prioritization(cand_uttr, dialog):
return flag


def tag_based_response_selection(dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills=None):
def tag_based_response_selection(
dialog, candidates, curr_single_scores, confidences, bot_utterances, all_prev_active_skills=None
):
all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else []
all_prev_active_skills = Counter(all_prev_active_skills)
annotated_uttr = dialog["human_utterances"][-1]
Expand Down Expand Up @@ -423,6 +400,10 @@ def tag_based_response_selection(dialog, candidates, scores, confidences, bot_ut
if confidences[cand_id] == 0.0 and cand_uttr["skill_name"] not in ACTIVE_SKILLS:
logger.info(f"Dropping cand_id: {cand_id} due to toxicity/badlists")
continue
skill_name = cand_uttr["skill_name"]
confidence = confidences[cand_id]
score = curr_single_scores[cand_id]
logger.info(f"Skill {skill_name} has final score: {score}. Confidence: {confidence}.")

all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations(
cand_uttr
Expand Down Expand Up @@ -646,7 +627,6 @@ def tag_based_response_selection(dialog, candidates, scores, confidences, bot_ut

logger.info(f"Current CASE: {CASE}")
# now compute current scores as one float value
curr_single_scores = compute_curr_single_scores(candidates, scores, confidences)

# remove disliked skills from hypotheses
if IGNORE_DISLIKED_SKILLS:
Expand Down
54 changes: 31 additions & 23 deletions response_selectors/convers_evaluation_based_selector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ def lower_duplicates_score(candidates, bot_utt_counter, scores, confidences):
# apply penalties to non-script skills and in case if response consists only from duplicates
if confidences[i] < 1.0 or n_duplicates == len(cand_sents):
confidences[i] /= coeff
scores[i]["isResponseInteresting"] /= coeff
scores[i]["responseEngagesUser"] /= coeff
scores[i] /= coeff


def lower_retrieve_skills_confidence_if_scenario_exist(candidates, scores, confidences):
Expand All @@ -134,33 +133,42 @@ def lower_retrieve_skills_confidence_if_scenario_exist(candidates, scores, confi
for i, cand in enumerate(candidates):
if cand["skill_name"] in retrieve_skills:
confidences[i] *= lower_coeff
scores[i]["isResponseInteresting"] *= lower_coeff


def calculate_single_convers_evaluator_score(cand_scores):
score_conv_eval = sum(
[
cand_scores["isResponseOnTopic"],
cand_scores["isResponseInteresting"],
cand_scores["responseEngagesUser"],
cand_scores["isResponseComprehensible"],
]
)
score_conv_eval -= cand_scores["isResponseErroneous"]
return score_conv_eval
scores[i] *= lower_coeff


def calculate_single_evaluator_score(hypothesis_annotations, confidence):
if "convers_evaluator_annotator" in hypothesis_annotations:
cand_scores = hypothesis_annotations["convers_evaluator_annotator"]
score_conv_eval = sum(
[
cand_scores["isResponseOnTopic"],
cand_scores["isResponseInteresting"],
cand_scores["responseEngagesUser"],
cand_scores["isResponseComprehensible"],
]
)
score_conv_eval -= cand_scores["isResponseErroneous"]
score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence
return score
elif "dialogrpt" in hypothesis_annotations:
score_conv_eval = hypothesis_annotations["dialogrpt"]
score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence
return score
elif "sentence_ranker" in hypothesis_annotations:
score_conv_eval = hypothesis_annotations["sentence_ranker"]
score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence
return score
elif "hypothesis_scorer" in hypothesis_annotations:
return hypothesis_annotations["hypothesis_scorer"]
else:
return 0.0


def downscore_toxic_badlisted_responses(scores, confidences, is_toxics):
# exclude toxic messages and messages with badlisted phrases
ids = np.arange(len(confidences))[is_toxics]
logger.info(f"Bot excluded utterances: {ids}. is_toxics: {is_toxics}")
scores[ids] = {
"isResponseOnTopic": 0.0,
"isResponseInteresting": 0.0,
"responseEngagesUser": 0.0,
"isResponseComprehensible": 0.0,
"isResponseErroneous": 1.0,
}
scores[ids] = 0.0
confidences[ids] = 0.0

return len(ids), scores, confidences
Expand Down
23 changes: 23 additions & 0 deletions services/sentence_ranker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# syntax=docker/dockerfile:experimental

FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime

WORKDIR /src

ARG PRETRAINED_MODEL_NAME_OR_PATH
ENV PRETRAINED_MODEL_NAME_OR_PATH ${PRETRAINED_MODEL_NAME_OR_PATH}
ARG SERVICE_PORT
ENV SERVICE_PORT ${SERVICE_PORT}

RUN mkdir /data/

COPY ./requirements.txt /src/requirements.txt
RUN pip install -r /src/requirements.txt

RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"
RUN python -c "from transformers import AutoModel; AutoModel.from_pretrained('${PRETRAINED_MODEL_NAME_OR_PATH}');"

COPY . /src

CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300

9 changes: 9 additions & 0 deletions services/sentence_ranker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Sentence Ranker Service

This is a universal service for evaluation of a sentence pair.

The model can be selected from HugginFace library and passed as a `PRETRAINED_MODEL_NAME_OR_PATH` parameter.

The service accepts a batch of sentence pairs (a pair is a list of two strings), and returns a batch of floating point values.

To rank a list of sentence pairs, one can get floating point values for each pair and maximize the value.
Loading