In [1]:
"""
!sudo apt install python3-dev
!sudoapt install python3-venv
!sudo apt install ffmpeg libavcodec-extra
!python -m pip install requirements.txt
!python -m pip install pyannote.audio==3.2.0
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]

!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt
!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml
!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/tokenizer_all_sets.tar

!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt
!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml

"""

import locale
locale.getpreferredencoding = lambda do_setlocale=True: "UTF-8"

import logging
import warnings
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.ERROR)


import os
from io import BytesIO
from typing import List, Tuple, Union

import hydra
import librosa
import numpy as np
import soundfile as sf
import torch
import torchaudio
from deep_translator import GoogleTranslator
from nemo.collections.asr.models import EncDecRNNTBPEModel

from omegaconf import DictConfig, ListConfig, OmegaConf
from pyannote.audio import Pipeline
from pydub import AudioSegment
from transformers import AutoModelForSequenceClassification, AutoTokenizer, T5ForConditionalGeneration, T5Tokenizer, AutoModel

from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

from src.gigamodels import SpecScaler, GigaAMEmo, FilterbankFeaturesTA, AudioToMelSpectrogramPreprocessor
from src.youtubedownloader import YouTubeAudioDownloader


In [2]:
### Не стал выносить классы в отдельные модули для наглядности, в отдельный файл вынес заимствованные классы от Gigamodels, а также нерелевантный задаче класс youtube downloader

In [3]:
class DialogueSummarizer:
    def __init__(self, model_name='d0rj/rut5-base-summ'):
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name).eval()

    def format_dialogue(self, transcriptions, speakers):
        formatted_dialogue = []
        for transcription, speaker in zip(transcriptions, speakers):
            formatted_dialogue.append(f"{speaker}: {transcription}")
        return '\n'.join(formatted_dialogue)

    def summarize(self, transcriptions, speakers):
        formatted_text = self.format_dialogue(transcriptions, speakers)
        input_ids = self.tokenizer(formatted_text, return_tensors='pt', max_length=512, truncation=True).input_ids
        
        with torch.no_grad():
            outputs = self.model.generate(input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
        
        summary = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return summary.strip()


class QuestionDetector:
    def __init__(self, model_name="Godfrey2712/amf_illoc_force_intent_recognition"):
        self.translator = GoogleTranslator(source='auto', target='en')
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.question_intents = {3, 7}  # "Assertive Questioning", "Pure Questioning"

    def is_question(self, text):
        translated_text = self.translator.translate(text.lower())
        intent = self.predict_intent(translated_text)
        return intent in self.question_intents

    def predict_intent(self, text):
        inputs = self.tokenizer(text, return_tensors="pt")
        outputs = self.model(**inputs)
        return torch.argmax(outputs.logits, dim=1).item()

class QuestionHandler:
    def __init__(self, model_name="IlyaGusev/saiga_tlite_8b", knowledge_file="knowledge.txt"):
        self.model_name = model_name
        self.knowledge_file = knowledge_file
        self.llm = self._setup_llm()
        self.knowledge_base = self._load_knowledge_base()
        self.prompt_template = self._setup_prompt_template()
        self.chain = self._setup_chain()

    def _setup_llm(self):
        
        tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
        model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True)
        
        
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=1000,
            temperature=0.1,
            top_p=0.95,
            repetition_penalty=1.05,
            device=0 if torch.cuda.is_available() else -1
        )
        
        return HuggingFacePipeline(pipeline=pipe)

    def _load_knowledge_base(self):
        try:
            with open(self.knowledge_file, 'r', encoding='utf-8') as file:
                return file.read()
        except FileNotFoundError:
            raise ValueError(f"Knowledge file '{self.knowledge_file}' not found.")
        except Exception as e:
            raise ValueError(f"Error loading knowledge base: {e}")

    def _setup_prompt_template(self):
        
        template = f"""Вы Система, русскоязычный автоматический ассистент и помощником оператора контакт-центра. Используйте следующую информацию в качестве базы знаний:
        {self.knowledge_base}
        
        Человек: Подготовь подсказку, как правильно оператору ответить на следующий вопрос: {{question}}
        
        Система: Вот рекомендация, как ответить на вопрос:
        - "Рекомендация: """
        
        return PromptTemplate(template=template, input_variables=["question"])

    def _setup_chain(self):
        
        memory = ConversationBufferMemory(memory_key="chat_history")
        
        return LLMChain(llm=self.llm, prompt=self.prompt_template, memory=memory)

    def handle_question(self, question):
        response = self.chain.run(question=question)
        hint = response.split("Рекомендация:")[-1].strip().split("\n")[0]
        return hint

class DialogueMonitor:
    def __init__(self, emotion_threshold=0.3, sentiment_threshold=-0.8, emotions_to_monitor=('angry', 'sad')):
        self.emotion_threshold = emotion_threshold
        self.sentiment_threshold = sentiment_threshold
        self.emotions_to_monitor = emotions_to_monitor
        
        
        model_checkpoint = 'cointegrated/rubert-tiny-sentiment-balanced'
        self.tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
        self.sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
        if torch.cuda.is_available():
            self.sentiment_model.cuda()
    
    def get_sentiment(self, text):
        with torch.no_grad():
            inputs = self.tokenizer(
                text,
                return_tensors='pt',
                truncation=True,
                padding=True
            ).to(self.sentiment_model.device)
            logits = self.sentiment_model(**inputs).logits
            proba = torch.softmax(logits, dim=1).cpu().numpy()[0]
        label = self.sentiment_model.config.id2label[proba.argmax()]
        score = proba.dot([-1, 0, 1])
        return label, score
    
    def check_dialogue(self, segment_emotions, cumulative_emotions, text):
        warnings = []
        
        
        for emotion in self.emotions_to_monitor:
            if segment_emotions['probabilities'].get(emotion, 0) > self.emotion_threshold:
                warnings.append(f"Segment {emotion} emotion above threshold")
        
        
        for emotion in self.emotions_to_monitor:
            if cumulative_emotions['probabilities'].get(emotion, 0) > self.emotion_threshold:
                warnings.append(f"Cumulative {emotion} emotion above threshold")
        
    
        sentiment_label, sentiment_score = self.get_sentiment(text)
        if sentiment_label == 'negative' and sentiment_score < self.sentiment_threshold:
            warnings.append(f"Strong negative sentiment detected (score: {sentiment_score:.2f})")
        
        return warnings, sentiment_score

class EmotionAnalyzer:
    def __init__(self, emo_model):
        self.emo_model = emo_model

    def classify_emotion(self, audio_segment: np.ndarray, sample_rate: int) -> dict:
        with torch.no_grad():
            sf.write("temp_segment.wav", audio_segment, sample_rate)
            probs = self.emo_model.get_probs("temp_segment.wav")[0]
            emotion_probs = {self.emo_model.id2name[i]: p for i, p in enumerate(probs)}
            dominant_emotion = max(emotion_probs, key=emotion_probs.get)
        return {"probabilities": emotion_probs, "dominant_emotion": dominant_emotion}

    @staticmethod
    def update_cumulative_emotions(cumulative_emotions: dict, new_emotions: dict) -> dict:
        if not cumulative_emotions:
            return new_emotions.copy()
        else:
            updated = {}
            for emotion in cumulative_emotions["probabilities"]:
                updated[emotion] = (cumulative_emotions["probabilities"][emotion] + new_emotions["probabilities"][emotion]) / 2
            dominant_emotion = max(updated, key=updated.get)
            return {"probabilities": updated, "dominant_emotion": dominant_emotion}

    @staticmethod
    def format_emotions(emotions: dict) -> str:
        rounded_emotions = {k: round(v, 2) for k, v in emotions['probabilities'].items()}
        return f"Dominant: {emotions['dominant_emotion']} | {rounded_emotions}"


class ThemeHandler:
    def __init__(self, config):
        self.model_name = 'intfloat/multilingual-e5-small'
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModel.from_pretrained(self.model_name)
        self.max_tokens = 512  
        self.threshold = config['theme_deviation_threshold']
        self.initial_buffer_seconds = config['initial_buffer_seconds']
        self.sliding_window = []
        self.goal_embedding = self.get_embedding(config['conversation_goal'])
        self.current_embedding = None
        self.total_duration = 0
        self.buffer_passed = False

    def get_embedding(self, text):
        inputs = self.tokenizer(text, return_tensors='pt', truncation=True, max_length=self.max_tokens)
        with torch.no_grad():
            outputs = self.model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

    def format_dialogue_text(self, speaker, text):
        return f"{speaker} сказал: {text}"

    def update_sliding_window(self, formatted_text):
        self.sliding_window.append(formatted_text)
        combined_text = " ".join(self.sliding_window)
        tokens = self.tokenizer.tokenize(combined_text)
        
        while len(tokens) > self.max_tokens:
            self.sliding_window.pop(0)
            combined_text = " ".join(self.sliding_window)
            tokens = self.tokenizer.tokenize(combined_text)
        
        return combined_text

    def update_embedding(self, speaker, text, segment_start, segment_end):
        self.total_duration = segment_end
        if not self.buffer_passed and self.total_duration >= self.initial_buffer_seconds:
            self.buffer_passed = True
        
        formatted_text = self.format_dialogue_text(speaker, text)
        sliding_window_text = self.update_sliding_window(formatted_text)
        self.current_embedding = self.get_embedding(sliding_window_text)

    def get_current_distance(self):
        if self.current_embedding is None or not self.buffer_passed:
            return None
        return cosine(self.goal_embedding, self.current_embedding)

    def check_theme_deviation(self):
        deviation = self.get_current_distance()
        if deviation is None:
            return False, 0
        return deviation >= self.threshold, deviation

    def get_deviation_warning(self):
        if not self.buffer_passed:
            return None
        is_deviated, deviation = self.check_theme_deviation()
        if is_deviated:
            return f"Warning: Conversation has deviated from the initial theme. Deviation: {deviation:.2f}"
        return None

    def get_status(self):
        if not self.buffer_passed:
            return f"Accumulating initial data ({self.total_duration:.2f}/{self.initial_buffer_seconds:.2f} seconds)"
        return None


class DialogueAnalyzer:
    def __init__(self, config):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        
        self.downloader = YouTubeAudioDownloader(target_sr=config['target_sr'])
        self.asr_model = self._load_asr_model()
        self.pipeline = self._load_diarization_pipeline()
        self.emo_model = self._load_emotion_model()
        self.emotion_analyzer = EmotionAnalyzer(self.emo_model)
        self.monitor = DialogueMonitor(**config['monitor_params'])
        self.question_detector = QuestionDetector()
        self.question_handler = QuestionHandler()
        self.summarizer = DialogueSummarizer()
        self.theme_handler = ThemeHandler(config)
        self.intent_map = {
            "0": "Agreeing",
            "1": "Arguing",
            "2": "Asserting",
            "3": "Assertive Questioning", #question
            "4": "Challenging",
            "5": "Default Illocuting",
            "6": "Disagreeing",
            "7": "Pure Questioning", #question
            "8": "Restating",
            "9": "Rhetorical Questioning" #no
        }
        self.question_handler = QuestionHandler(knowledge_file=config['knowledge_file'])
        
    def _load_asr_model(self):
        model = EncDecRNNTBPEModel.from_config_file(self.config['asr_config'])
        ckpt = torch.load(self.config['asr_weights'], map_location="cpu")
        model.load_state_dict(ckpt, strict=False)
        return model.eval().to(self.device)
    
    def _load_diarization_pipeline(self):
        pipeline = Pipeline.from_pretrained(self.config['diarization_model'], use_auth_token=self.config['hf_token'])
        return pipeline.to(self.device)
    
    def _load_emotion_model(self):
        conf = OmegaConf.load(self.config['emo_config'])
        model = GigaAMEmo(conf)
        ckpt = torch.load(self.config['emo_weights'], map_location="cpu")
        model.load_state_dict(ckpt, strict=False)
        return model.eval().to(self.device)
    
    def process_url(self, url):
        try:
            audio_path = self.downloader.download_and_convert(url)
            
            print(f"Processed: {url}")
            print(f"Audio saved to: {audio_path}")
            return audio_path
        except Exception as e:
            
            print(f"Failed to process {url}: {str(e)}")
            return None
    
    @staticmethod
    def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
        samples = np.array(audiosegment.get_array_of_samples())
        if audiosegment.channels == 2:
            samples = samples.reshape((-1, 2))
        samples = samples.astype(np.float32, order="C") / 32768.0
        return samples

    def segment_audio(self, audio_path: str) -> Tuple[List[np.ndarray], List[List[float]], List[str]]:
        audio = AudioSegment.from_wav(audio_path)
        diarization_results = self.pipeline(audio_path)
        
        segments = []
        boundaries = []
        speakers = []
        curr_duration = 0
        curr_start = 0
        curr_end = 0
        curr_speaker = None
        
        for turn, _, speaker in diarization_results.itertracks(yield_label=True):
            start = max(0, turn.start)
            end = min(len(audio) / 1000, turn.end)
            
            if (curr_duration > self.config['min_duration'] and start - curr_end > self.config['new_chunk_threshold']) or \
               (curr_duration + (end - curr_end) > self.config['max_duration']) or \
               (speaker != curr_speaker):
                if curr_duration != 0:
                    audio_segment = self.audiosegment_to_numpy(
                        audio[int(curr_start * 1000) : int(curr_end * 1000)]
                    )
                    segments.append(audio_segment)
                    boundaries.append([curr_start, curr_end])
                    speakers.append(curr_speaker)
                curr_start = start
                curr_speaker = speaker
            
            curr_end = end
            curr_duration = curr_end - curr_start

        if curr_duration != 0:
            audio_segment = self.audiosegment_to_numpy(
                audio[int(curr_start * 1000) : int(curr_end * 1000)]
            )
            segments.append(audio_segment)
            boundaries.append([curr_start, curr_end])
            speakers.append(curr_speaker)

        return segments, boundaries, speakers

    @staticmethod
    def format_time(seconds):
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        seconds = seconds % 60
        full_seconds = int(seconds)
        milliseconds = int((seconds - full_seconds) * 100)
        if hours > 0:
            return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
        else:
            return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
    
    def analyze_dialogue(self, audio_path):
        full_audio, sample_rate = sf.read(audio_path)
        segments, boundaries, speakers = self.segment_audio(audio_path)
        transcriptions = self.asr_model.transcribe(segments, batch_size=self.config['batch_size'])[0]
        
        all_transcriptions = []
        all_speakers = []
        cumulative_emotions = None
        
        print("\n--- Dialogue Analysis ---\n")
        
        for transcription, boundary, speaker in zip(transcriptions, boundaries, speakers):
            if not transcription.strip():  
                continue
            
            boundary_0 = self.format_time(boundary[0])
            boundary_1 = self.format_time(boundary[1])
            
            start_sample = int(boundary[0] * sample_rate)
            end_sample = int(boundary[1] * sample_rate)
            audio_segment = full_audio[start_sample:end_sample]
            
            segment_emotions = self.emotion_analyzer.classify_emotion(audio_segment, sample_rate)
            
            if cumulative_emotions is None:
                cumulative_emotions = segment_emotions.copy()
            else:
                cumulative_emotions = self.emotion_analyzer.update_cumulative_emotions(cumulative_emotions, segment_emotions)
            
            warnings, sentiment_score = self.monitor.check_dialogue(segment_emotions, cumulative_emotions, transcription)
            
            intent = self.question_detector.predict_intent(transcription)
            intent_label = self.intent_map[str(intent)]
            is_question = intent in self.question_detector.question_intents
            
            self.theme_handler.update_embedding(speaker, transcription, boundary[0], boundary[1])
            theme_status = self.theme_handler.get_status()
            theme_distance = self.theme_handler.get_current_distance()

            theme_warning = self.theme_handler.get_deviation_warning()
            if theme_warning:
                warnings.append(theme_warning)
            
            print(f"\nTime: [{boundary_0} - {boundary_1}]")
            print(f"Speaker: {speaker}")
            print(f"Transcription: {transcription}")
            print(f"Segment emotions: {self.emotion_analyzer.format_emotions(segment_emotions)}")
            print(f"Cumulative emotions: {self.emotion_analyzer.format_emotions(cumulative_emotions)}")
            print(f"Sentiment score: {sentiment_score:.2f}")
            print(f"Intent: {intent_label}")
        
            if theme_status:
                print(f"Theme status: {theme_status}")
            elif theme_distance is not None:
                print(f"Theme distance: {theme_distance:.2f}")
            
            if warnings:
                print("Warnings:")
                for warning in warnings:
                    print(f"- {warning}")
            
            
            if is_question:
                hint = self.question_handler.handle_question(transcription)
                print("\nПодсказка для оператора:")
                print(hint)
            
            print("-" * 50)
            
            all_transcriptions.append(transcription)
            all_speakers.append(speaker)
        
        print("\n--- Dialogue Summary ---\n")
        summary = self.summarizer.summarize(all_transcriptions, all_speakers)
        print(summary)
        
    def run(self, urls):
        for url in urls:
            audio_path = self.process_url(url)
            if audio_path:
                self.analyze_dialogue(audio_path)

### Хорошая практика - выносить конфиг в отдельный файл, но решил оставить тут для наглядности

In [4]:
if __name__ == "__main__":
    config = {
        'hf_token': os.getenv['HF_TOKEN'],
        'target_sr': 16000,
        'asr_config': "./rnnt_model_config.yaml",
        'asr_weights': "./rnnt_model_weights.ckpt",
        'knowledge_file': './knowledge.txt',
        'diarization_model': "pyannote/speaker-diarization-3.1",
        'emo_config': "emo_model_config.yaml",
        'emo_weights': "emo_model_weights.ckpt",
        'monitor_params': {
            'emotion_threshold': 0.3,
            'sentiment_threshold': -0.8,
            'emotions_to_monitor': ('angry', 'sad')
        },
        'batch_size': 10,
        'max_duration': 22.0,
        'min_duration': 15.0,
        'new_chunk_threshold': 0.2,
        'conversation_goal': "подтверждение операции по заявке на изменение финансового номера телефона",
        'theme_deviation_threshold': 0.15,
        'initial_buffer_seconds': 25
    }
    
   
    urls = [
        #'https://youtu.be/UCLpgUjoExU',
        'https://youtu.be/gdEWuP2gIuA'
    ]
   
    analyzer = DialogueAnalyzer(config)
    analyzer.run(urls)

[NeMo I 2024-07-28 19:55:19 mixins:172] Tokenizer SentencePieceTokenizer initialized with 512 tokens


[NeMo W 2024-07-28 19:55:20 audio_to_text_dataset:830] Could not load dataset as `manifest_filepath` was None. Provided config : {'shuffle': False, 'manifest_filepath': None}


[NeMo I 2024-07-28 19:55:20 features:305] PADDING: 0
[NeMo I 2024-07-28 19:55:22 rnnt_models:224] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-07-28 19:55:22 rnnt_models:224] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-07-28 19:55:22 rnnt_models:224] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

      warn_deprecated(
    
      warn_deprecated(
    
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Processed: https://youtu.be/gdEWuP2gIuA
Audio saved to: /home/dvulybi1/vtb/downloads/gdEWuP2gIuA_mono_16000hz.wav


Transcribing: 100%|██████████| 4/4 [00:01<00:00,  3.36it/s]



--- Dialogue Analysis ---


Time: [00:01:97 - 00:04:31]
Speaker: SPEAKER_01
Transcription: александра евгеньевна здравствуйте
Segment emotions: Dominant: neutral | {'angry': 0.0, 'sad': 0.0, 'neutral': 0.99, 'positive': 0.0}
Cumulative emotions: Dominant: neutral | {'angry': 0.0, 'sad': 0.0, 'neutral': 0.99, 'positive': 0.0}
Sentiment score: 0.00
Intent: Arguing
Theme status: Accumulating initial data (4.32/25.00 seconds)
--------------------------------------------------

Time: [00:05:04 - 00:05:71]
Speaker: SPEAKER_00
Transcription: слушаю вас
Segment emotions: Dominant: angry | {'angry': 0.69, 'sad': 0.0, 'neutral': 0.2, 'positive': 0.11}
Cumulative emotions: Dominant: neutral | {'angry': 0.34, 'sad': 0.0, 'neutral': 0.6, 'positive': 0.06}
Sentiment score: 0.01
Intent: Arguing
Theme status: Accumulating initial data (5.72/25.00 seconds)
- Segment angry emotion above threshold
- Cumulative angry emotion above threshold
--------------------------------------------------

Time: [00:05

      warn_deprecated(
    



Time: [00:44:26 - 00:45:69]
Speaker: SPEAKER_01
Transcription: нет а почему мне должно быть стыдно
Segment emotions: Dominant: neutral | {'angry': 0.0, 'sad': 0.0, 'neutral': 0.99, 'positive': 0.0}
Cumulative emotions: Dominant: neutral | {'angry': 0.02, 'sad': 0.0, 'neutral': 0.88, 'positive': 0.1}
Sentiment score: -0.70
Intent: Assertive Questioning
Theme distance: 0.11

Подсказка для оператора:
1. Признайте чувства клиента. 2. Объясните причины, по которым ему не стоит стыдиться. 3. Предложите поддержку и помощь."
--------------------------------------------------

Time: [00:46:28 - 00:49:81]
Speaker: SPEAKER_00
Transcription: ну то есть вы считаете это нормальный способ заработка обманывать людей
Segment emotions: Dominant: neutral | {'angry': 0.0, 'sad': 0.0, 'neutral': 1.0, 'positive': 0.0}
Cumulative emotions: Dominant: neutral | {'angry': 0.01, 'sad': 0.0, 'neutral': 0.94, 'positive': 0.05}
Sentiment score: -0.55
Intent: Arguing
Theme distance: 0.11
---------------------------

Token indices sequence length is longer than the specified maximum sequence length for this model (559 > 512). Running this sequence through the model will result in indexing errors



Time: [01:46:34 - 02:00:65]
Speaker: SPEAKER_00
Transcription: да причем здесь депутаты перед богом вы же не депутатами станете ну перед своей совестью перед сами собой ну ладно и на меня нарвались нарветесь на какую нибудь многодетную семью украдите ее последние кровные
Segment emotions: Dominant: angry | {'angry': 0.72, 'sad': 0.0, 'neutral': 0.27, 'positive': 0.0}
Cumulative emotions: Dominant: neutral | {'angry': 0.36, 'sad': 0.0, 'neutral': 0.64, 'positive': 0.0}
Sentiment score: -0.88
Intent: Arguing
Theme distance: 0.19
- Segment angry emotion above threshold
- Cumulative angry emotion above threshold
- Strong negative sentiment detected (score: -0.88)
--------------------------------------------------

Time: [02:00:88 - 02:02:88]
Speaker: SPEAKER_01
Transcription: ну не ну не я так кто то другой
Segment emotions: Dominant: neutral | {'angry': 0.0, 'sad': 0.0, 'neutral': 0.99, 'positive': 0.01}
Cumulative emotions: Dominant: neutral | {'angry': 0.18, 'sad': 0.0, 'neutral': 0.81