diff --git a/annotators/emotion_detection/Dockerfile b/annotators/emotion_detection/Dockerfile new file mode 100644 index 0000000000..b0f1dad392 --- /dev/null +++ b/annotators/emotion_detection/Dockerfile @@ -0,0 +1,38 @@ +FROM nvidia/cuda:12.1.1-base-ubuntu20.04 + +RUN apt update +RUN apt install -y python3.9 +RUN apt install -y git python3-pip + +ARG VIDEO_PRETRAINED +ARG TEXT_PRETRAINED +ARG MODEL_PATH +ARG MULTIMODAL_MODEL +ARG REDUNDANT_FEATURES + +ENV VIDEO_PRETRAINED=$VIDEO_PRETRAINED +ENV TEXT_PRETRAINED=$TEXT_PRETRAINED +ENV MULTIMODAL_MODEL=$MULTIMODAL_MODEL +ENV MODEL_PATH=$MODEL_PATH +ENV REDUNDANT_FEATURES=$REDUNDANT_FEATURES + +WORKDIR /src + +COPY . /src +RUN mkdir /data +RUN pip install -r requirements.txt + +RUN apt install -y ffmpeg=7:4.2.7-0ubuntu0.1 libsm6=2:1.2.3-1 libxext6=2:1.3.4-0ubuntu1 + +RUN pip install gdown==4.7.1 + +RUN git clone https://github.com/anna-a-m/MultimodalERC /data/repo && cd /data/repo && git reset --hard 84097d442b23b5a9238b5090a04e2625741314ae + +RUN mv -f /data/repo/* /data/ && rm -rf /data/repo + +RUN touch /data/multimodal_concat/__init__.py + +RUN apt-get install -y wget + +RUN wget -O models http://files.deeppavlov.ai/dream_data/emotion_detection/emotion_detection_v1.tar.gz && tar -xf models -C /data/ +RUN wget -O redundant_feat http://files.deeppavlov.ai/dream_data/emotion_detection/redundant_feat.txt && mv -f redundant_feat /data/ \ No newline at end of file diff --git a/annotators/emotion_detection/aux.py b/annotators/emotion_detection/aux.py new file mode 100644 index 0000000000..2a2a0732bd --- /dev/null +++ b/annotators/emotion_detection/aux.py @@ -0,0 +1,4 @@ +import sys + +sys.path.append("/data") +sys.path.append("/data/multimodal_concat") diff --git a/annotators/emotion_detection/requirements.txt b/annotators/emotion_detection/requirements.txt new file mode 100644 index 0000000000..9dbf8baf15 --- /dev/null +++ b/annotators/emotion_detection/requirements.txt @@ -0,0 +1,16 @@ +pandas==1.5.3 +scikit-learn==1.3.0 +tqdm==4.64.1 +opencv-python==4.7.0.68 +opensmile==2.4.2 +sentry-sdk==1.15.0 +torch==1.13.1 +transformers==4.31.0 +fastapi==0.103.0 +blinker==1.5.0 +pydantic==2.3.0 +numpy==1.24.4 +starlette==0.27.0 +uvicorn==0.23.2 +Pillow==9.3.0 +wandb==0.13.9 \ No newline at end of file diff --git a/annotators/emotion_detection/server.py b/annotators/emotion_detection/server.py new file mode 100644 index 0000000000..f405fbacdd --- /dev/null +++ b/annotators/emotion_detection/server.py @@ -0,0 +1,199 @@ +import logging +import os +import opensmile +import torch +import numpy as np +import sentry_sdk +import cv2 +import aux # noqa: F401 + +from multimodal_concat.models import MultimodalClassificationModel, MainModel +from multimodal_concat.utils import prepare_models + +from fastapi import FastAPI +from fastapi.encoders import jsonable_encoder +from pydantic import BaseModel +from starlette.middleware.cors import CORSMiddleware +from transformers import AutoTokenizer, AutoProcessor +from typing import List +from urllib.request import urlretrieve + +sentry_sdk.init(dsn=os.getenv("SENTRY_DSN")) + +label2id = { + "anger": 0, + "disgust": 1, + "fear": 2, + "joy": 3, + "neutral": 4, + "sadness": 5, + "surprise": 6, +} +num_labels = 7 +text_model, video_model, audio_model = prepare_models(num_labels, os.getenv("MODEL_PATH")) + +logger = logging.getLogger(__name__) + + +def sample_frame_indices(seg_len, clip_len=16, frame_sample_rate=4, mode="video"): + converted_len = int(clip_len * frame_sample_rate) + converted_len = min(converted_len, seg_len - 1) + end_idx = np.random.randint(converted_len, seg_len) + start_idx = end_idx - converted_len + if mode == "video": + indices = np.linspace(start_idx, end_idx, num=clip_len) + else: + indices = np.linspace(start_idx, end_idx, num=clip_len * frame_sample_rate) + indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64) + return indices + + +def get_frames( + file_path, + clip_len=16, +): + cap = cv2.VideoCapture(file_path) + v_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + indices = sample_frame_indices(v_len) + + frames = [] + for fn in range(v_len): + success, frame = cap.read() + if success is False: + continue + if fn in indices: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + res = cv2.resize(frame, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) + frames.append(res) + cap.release() + + if len(frames) < clip_len: + add_num = clip_len - len(frames) + frames_to_add = [frames[-1]] * add_num + frames.extend(frames_to_add) + + return frames + + +def create_final_model(): + multi_model = MultimodalClassificationModel( + text_model, + video_model, + audio_model, + num_labels, + input_size=4885, + hidden_size=512, + ) + checkpoint = torch.load(os.getenv("MULTIMODAL_MODEL")) + multi_model.load_state_dict(checkpoint) + + device = "cuda" + return MainModel(multi_model, device=device) + + +def process_text(input_tokens: str): + text_model_name = os.getenv("TEXT_PRETRAINED") + logger.info(f"{text_model_name}") + tokenizer = AutoTokenizer.from_pretrained(text_model_name) + + return tokenizer( + input_tokens, + padding="max_length", + truncation=True, + max_length=128, + return_tensors="pt", + ) + + +def process_video(video_path: str): + video_frames = get_frames(video_path) + + video_model_name = os.getenv("VIDEO_PRETRAINED") + video_feature_extractor = AutoProcessor.from_pretrained(video_model_name) + + return video_feature_extractor(videos=video_frames, return_tensors="pt") + + +def process_audio(file_path: str): + smile = opensmile.Smile( + opensmile.FeatureSet.ComParE_2016, + opensmile.FeatureLevel.Functionals, + sampling_rate=16000, + resample=True, + num_workers=5, + verbose=True, + ) + + redundant_features = os.getenv("REDUNDANT_FEATURES") + with open(redundant_features, "r") as features_file: + redundant_features_list = features_file.read().split(",") + + audio_features = smile.process_files([file_path]) + audio_features = audio_features.drop(columns=redundant_features_list, inplace=False) + return audio_features.values.reshape(audio_features.shape[0], 1, audio_features.shape[1]) + + +def inference(text: str, video_path: str): + text_encoding = process_text(text) + video_encoding = process_video(video_path) + audio_features = process_audio(video_path) + batch = { + "text": text_encoding, + "video": video_encoding, + "audio": audio_features, + "label": None, + } + label = final_model(batch) + id2label = {v: k for k, v in label2id.items()} + return id2label[int(label.detach().cpu())] + + +def predict_emotion(text: str, video_path: str): + try: + logger.warning(f"{inference(text, video_path)}") + return inference(text, video_path) + except Exception as e: + sentry_sdk.capture_exception(e) + raise e + + +final_model = create_final_model() + + +class EmotionsPayload(BaseModel): + personality: List[str] + video_path: List[str] + + +def subinfer(msg_text: str, video_path: str): + emotion = "Emotion detection unsuccessfull. An error occured during inference." + filepath = "undefined" + try: + filename = video_path.split("=")[-1] + filepath = f"/data/{filename}" + urlretrieve(video_path, filepath) + if not os.path.exists(filepath): + raise ValueError(f"Failed to retrieve videofile from {filepath}") + emotion = predict_emotion(msg_text + " ", filepath) + logger.info(f"Detected emotion: {jsonable_encoder(emotion)}") + except Exception as e: + raise ValueError(f"The message format is correct, but: {e}") + + return emotion + + +app = FastAPI() +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.post("/model") +def infer(payload: EmotionsPayload): + logger.info(f"Emotion Detection: {payload}") + emotion = [subinfer(p[0], p[1]) for p in zip(payload.personality, payload.video_path)] + return jsonable_encoder(emotion) diff --git a/annotators/emotion_detection/service_configs/emotion-detection/environment.yml b/annotators/emotion_detection/service_configs/emotion-detection/environment.yml new file mode 100644 index 0000000000..beaa53c7c6 --- /dev/null +++ b/annotators/emotion_detection/service_configs/emotion-detection/environment.yml @@ -0,0 +1,9 @@ +SERVICE_PORT: 8040 +SERVICE_NAME: emotion_detection +CUDA_VISIBLE_DEVICES: 0 +VIDEO_PRETRAINED: "microsoft/xclip-base-patch32" +EXT_PRETRAINED: "bert-large-uncased" +MULTIMODAL_MODEL: "final_model.pt" +REDUNDANT_FEATURES: "redundant_features.txt" +MODEL_PATH: "/data/" +PREFIX: "Detect emotions:" \ No newline at end of file diff --git a/annotators/emotion_detection/service_configs/emotion-detection/service.yml b/annotators/emotion_detection/service_configs/emotion-detection/service.yml new file mode 100644 index 0000000000..83876ca7a0 --- /dev/null +++ b/annotators/emotion_detection/service_configs/emotion-detection/service.yml @@ -0,0 +1,29 @@ +name: emotion-detection +endpoints: +- model +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8040 + SERVICE_NAME: emotion_detection + VIDEO_PRETRAINED: "microsoft/xclip-base-patch32" + TEXT_PRETRAINED: "bert-large-uncased" + MULTIMODAL_MODEL: "final_model.pt" + REDUNDANT_FEATURES: "redundant_features.txt" + MODEL_PATH: "/data/" + PREFIX: "Detect emotions:" + context: . + dockerfile: ./annotators/emotion_detection/Dockerfile + command: uvicorn server:app --host 0.0.0.0 --port 8040 + deploy: + resources: + limits: + memory: 1G + reservations: + memory: 1G + environment: + - CUDA_VISIBLE_DEVICES=0 + ports: + - 8040:8040 \ No newline at end of file diff --git a/annotators/kbqa/tests/test_kbqa.py b/annotators/kbqa/tests/test_kbqa.py index 09048d8e71..a147694b22 100644 --- a/annotators/kbqa/tests/test_kbqa.py +++ b/annotators/kbqa/tests/test_kbqa.py @@ -13,7 +13,7 @@ ), ( {"x_init": ["How old is Donald Trump?"], "entities": [["Donald Trump"]], "entity_tags": [[["per", 1.0]]]}, - "Donald Trump is 77 years old.", + "Donald Trump is 78 years old.", ), ], ) diff --git a/annotators/personality_detection/Dockerfile b/annotators/personality_detection/Dockerfile index 91a64350c8..fde4965cb9 100644 --- a/annotators/personality_detection/Dockerfile +++ b/annotators/personality_detection/Dockerfile @@ -2,10 +2,14 @@ FROM python:3.7 WORKDIR /src -RUN git clone https://github.com/jkwieser/personality-detection-text.git /personality-detection-text +COPY . /src +RUN mkdir /data COPY requirements.txt . RUN pip install -r requirements.txt +RUN wget -O models http://files.deeppavlov.ai/dream_data/personality_detection/personality_detection_models_v0.tar.gz && tar -xf models +RUN mv -f models_v0 /data/models && ls /data/models && rm -rf models_v0 + COPY . . diff --git a/annotators/personality_detection/requirements.txt b/annotators/personality_detection/requirements.txt index c27af54ab0..4f510579d1 100644 --- a/annotators/personality_detection/requirements.txt +++ b/annotators/personality_detection/requirements.txt @@ -1,5 +1,4 @@ scikit-learn==0.22.1 -plotly==4.14.3 pandas==1.2.4 uvicorn==0.13.4 fastapi==0.65.1 diff --git a/annotators/personality_detection/server.py b/annotators/personality_detection/server.py index d77a27ace1..11ede39161 100644 --- a/annotators/personality_detection/server.py +++ b/annotators/personality_detection/server.py @@ -12,13 +12,13 @@ sentry_sdk.init(os.getenv("SENTRY_DSN")) -cEXT = pickle.load(open("/personality-detection-text/data/models/cEXT.p", "rb")) -cNEU = pickle.load(open("/personality-detection-text/data/models/cNEU.p", "rb")) -cAGR = pickle.load(open("/personality-detection-text/data/models/cAGR.p", "rb")) -cCON = pickle.load(open("/personality-detection-text/data/models/cCON.p", "rb")) -cOPN = pickle.load(open("/personality-detection-text/data/models/cOPN.p", "rb")) -vectorizer_31 = pickle.load(open("/personality-detection-text/data/models/vectorizer_31.p", "rb")) -vectorizer_30 = pickle.load(open("/personality-detection-text/data/models/vectorizer_30.p", "rb")) +cEXT = pickle.load(open("/data/models/cEXT.p", "rb")) +cNEU = pickle.load(open("/data/models/cNEU.p", "rb")) +cAGR = pickle.load(open("/data/models/cAGR.p", "rb")) +cCON = pickle.load(open("/data/models/cCON.p", "rb")) +cOPN = pickle.load(open("/data/models/cOPN.p", "rb")) +vectorizer_31 = pickle.load(open("/data/models/vectorizer_31.p", "rb")) +vectorizer_30 = pickle.load(open("/data/models/vectorizer_30.p", "rb")) logger = logging.getLogger(__name__) diff --git a/assistant_dists/dream_embodied/dev.yml b/assistant_dists/dream_embodied/dev.yml index 210cf31e55..d3004e9b4e 100644 --- a/assistant_dists/dream_embodied/dev.yml +++ b/assistant_dists/dream_embodied/dev.yml @@ -5,6 +5,11 @@ services: - ".:/dp-agent" ports: - 4242:4242 + files: + ports: + - 3000:3000 + volumes: + - "~/.deeppavlov/file_server:/tmp" ranking-based-response-selector: volumes: - "./response_selectors/ranking_based_response_selector:/src" diff --git a/assistant_dists/dream_ocean/dev.yml b/assistant_dists/dream_ocean/dev.yml index 212a907368..d0e2a7d429 100644 --- a/assistant_dists/dream_ocean/dev.yml +++ b/assistant_dists/dream_ocean/dev.yml @@ -117,6 +117,12 @@ services: - "./common:/src/common" ports: - 8145:8145 + dff-dream-persona-chatgpt-prompted-skill: + volumes: + - "./skills/dff_template_prompted_skill:/src" + - "./common:/src/common" + ports: + - 8137:8137 property-extraction: volumes: - "./annotators/property_extraction:/src" @@ -144,4 +150,14 @@ services: - "./annotators/personality_detection:/src" ports: - 8026:8026 + emotion-detection: + volumes: + - "./annotators/emotion_detection:/src" + ports: + - 8040:8040 + files: + ports: + - 3000:3000 + volumes: + - "~/.deeppavlov/file_server:/tmp" version: "3.7" diff --git a/assistant_dists/dream_ocean/docker-compose.override.yml b/assistant_dists/dream_ocean/docker-compose.override.yml index 60f24f08b7..2924e0fb6f 100644 --- a/assistant_dists/dream_ocean/docker-compose.override.yml +++ b/assistant_dists/dream_ocean/docker-compose.override.yml @@ -1,13 +1,14 @@ services: agent: - command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_ocean/pipeline_conf.json agent.db_config=assistant_dists/dream_ocean/db_conf.json' + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_ocean/pipeline_conf.json agent.db_config=assistant_dists/dream_ocean/db_conf.json' environment: WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, dff-intent-responder-skill:8012, intent-catcher:8014, ner:8021, factoid-qa:8071, kbqa:8072, entity-linking:8075, wiki-parser:8077, text-qa:8078, combined-classification:8087, fact-retrieval:8100, entity-detection:8103, sentence-ranker:8128, property-extraction:8136, prompt-selector:8135, openai-api-chatgpt:8145, - openai-api-chatgpt-16k:8167, summarization-annotator:8058, dialog-summarizer:8059, personality-detection:8026" + dff-dream-persona-chatgpt-prompted-skill:8137, + openai-api-chatgpt-16k:8167, summarization-annotator:8058, dialog-summarizer:8059, personality-detection:8026, emotion-detection:8040" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 @@ -15,6 +16,9 @@ services: LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json + files: + image: julienmeerschart/simple-file-upload-download-server + ranking-based-response-selector: env_file: [ .env ] build: @@ -347,6 +351,27 @@ services: reservations: memory: 100M + dff-dream-persona-chatgpt-prompted-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8137 + SERVICE_NAME: dff_dream_persona_prompted_skill + PROMPT_FILE: common/prompts/dream_persona.json + GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json + GENERATIVE_TIMEOUT: 120 + N_UTTERANCES_CONTEXT: 7 + ENVVARS_TO_SEND: OPENAI_API_KEY,OPENAI_ORGANIZATION + context: . + dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + property-extraction: env_file: [.env] build: @@ -426,6 +451,7 @@ services: memory: 4G personality-detection: + env_file: [ .env ] build: args: SERVICE_PORT: 8026 @@ -441,4 +467,26 @@ services: reservations: memory: 312M + emotion-detection: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8040 + SERVICE_NAME: emotion_detection + VIDEO_PRETRAINED: "microsoft/xclip-base-patch32" + TEXT_PRETRAINED: "bert-large-uncased" + MODEL_PATH: "/data/emotion_detection_models/" + MULTIMODAL_MODEL: "/data/emotion_detection_models/multimodal_model_with_early_fusion.pt" + REDUNDANT_FEATURES: "/data/redundant_feat" + context: annotators/emotion_detection + command: uvicorn server:app --host 0.0.0.0 --port 8040 + deploy: + resources: + limits: + memory: 1G + reservations: + memory: 1G + environment: + - CUDA_VISIBLE_DEVICES=0 + version: '3.7' diff --git a/assistant_dists/dream_ocean/pipeline_conf.json b/assistant_dists/dream_ocean/pipeline_conf.json index 314048aad2..80e5261001 100644 --- a/assistant_dists/dream_ocean/pipeline_conf.json +++ b/assistant_dists/dream_ocean/pipeline_conf.json @@ -101,6 +101,22 @@ "service": "annotators/personality_detection/service_configs/personality_detection" } }, + "emotion_detection": { + "connector": { + "protocol": "http", + "timeout": 15.0, + "url": "http://emotion-detection:8040/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:emotion_detection_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "state_manager_method": "add_annotation", + "previous_services": [], + "is_enabled": true, + "source": { + "component": "components/OQWoiqfnqwfnoqwib.yml", + "service": "annotators/emotion_detection/service_configs/emotion-detection" + } + }, "prompt_goals_collector": { "connector": { "protocol": "http", @@ -435,6 +451,27 @@ } }, "skills": { + "dff_dream_persona_prompted_skill": { + "connector": { + "protocol": "http", + "timeout": 120.0, + "url": "http://dff-dream-persona-chatgpt-prompted-skill:8137/respond" + }, + "dialog_formatter": { + "name": "state_formatters.dp_formatters:dff_prompted_skill_formatter", + "skill_name": "dff_dream_persona_prompted_skill" + }, + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/W6hdAGshQyMwdQukRXXuKA.yml", + "service": "skills/dff_template_prompted_skill/service_configs/dff-dream-persona-chatgpt-prompted-skill" + } + }, "dff_intent_responder_skill": { "connector": { "protocol": "http", @@ -514,11 +551,11 @@ } }, "metadata": { - "display_name": "Dream", + "display_name": "Dream Ocean", "author": "DeepPavlov", - "description": "Main version of DeepPavlov Dream Socialbot", + "description": "Multimodal emotion version of DeepPavlov Dream Socialbot", "version": "0.1.0", - "date_created": "2022-12-12T12:12:00", + "date_created": "2023-12-12T12:12:00", "ram_usage": "20 GB", "gpu_usage": "20 GB", "disk_usage": "20 GB" diff --git a/components.tsv b/components.tsv index 8e0620f53d..73b32d7a3c 100644 --- a/components.tsv +++ b/components.tsv @@ -43,7 +43,7 @@ 8037 dff-weather-skill 8038 robot-fake-server 8039 -8040 +8040 emotion-detection 8041 dff-embodied-skill 8042 embodied-sender 8043 diff --git a/components/OQWoiqfnqwfnoqwib.yml b/components/OQWoiqfnqwfnoqwib.yml new file mode 100644 index 0000000000..d81ae65ecc --- /dev/null +++ b/components/OQWoiqfnqwfnoqwib.yml @@ -0,0 +1,23 @@ +name: emotion_detection +display_name: Emotion Detection +component_type: null +model_type: ML-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Emotion detection annotator. +ram_usage: 1G +gpu_usage: 1G +group: annotators +connector: + protocol: http + timeout: 15.0 + url: http://emotion-detection:8045/model +dialog_formatter: state_formatters.dp_formatters:personality_catcher_formatter_dialog +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: null +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: model +service: annotators/emotion_detection/service_configs/emotion_detection +date_created: '2023-08-28T12:12:12' \ No newline at end of file diff --git a/components/OowqncqowNAbj.yml b/components/OowqncqowNAbj.yml index 60d640c74f..3ec35fe8ab 100644 --- a/components/OowqncqowNAbj.yml +++ b/components/OowqncqowNAbj.yml @@ -20,4 +20,4 @@ state_manager_method: add_annotation tags: null endpoint: model service: annotators/personality_detection/service_configs/personality_detection -date_created: '2023-08-24T12:12:12' +date_created: '2023-08-24T12:12:12' \ No newline at end of file diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index cbae9fa91e..c24ffce33b 100755 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -92,13 +92,15 @@ def convert_formatter_dialog(dialog: Dict) -> List[Dict]: def personality_catcher_formatter_dialog(dialog: Dict) -> List[Dict]: # Used by: personality_catcher_formatter + return [{"personality": [dialog["human_utterances"][-1].get("text")]}] + + +def emotion_detection_formatter(dialog: Dict) -> List[Dict]: + # Used by: emotion_detection annotator return [ { - "personality": [ - dialog["human_utterances"][-1]["annotations"].get( - "spelling_preprocessing", dialog["human_utterances"][-1]["text"] - ) - ] + "personality": [dialog["human_utterances"][-1].get("text")], + "video_path": [dialog["human_utterances"][-1]["attributes"].get("video_path")], } ]