In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import asyncio
import logging

from app.redis_transcribe.connection import get_redis_client
from app.settings import settings
from app.services.transcription.processor import Processor

In [3]:
from app.services.audio.redis_models import (
    Meeting,
    Transcriber,
    TranscriptStore,
    TranscriptPrompt,
    best_covering_connection,
    connection_with_minimal_start_greater_than_target,
    get_timestamps_overlap
)

In [5]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [6]:
redis_client = await get_redis_client(settings.redis_host, settings.redis_port,settings.redis_password)

processor = Processor(redis_client, logger, max_length=30)

2025-02-28 17:03:30,998 - INFO - Attempting Redis connection to redis:6379
2025-02-28 17:03:30,999 - INFO - Redis password is configured
2025-02-28 17:03:31,002 - INFO - Successfully connected to Redis server


In [9]:
dfs = []

In [10]:
import pandas as pd

In [11]:
threshhold = 0

In [12]:
from app.services.transcription.processor import SpeakerMeta

In [13]:
self = processor

In [8]:
await redis_client.flushdb()

True

In [144]:
ok = await processor.read()

2025-02-28 12:26:50,009 - INFO - seek_timestamp: 2025-02-13 15:33:53.056000+00:00
2025-02-28 12:26:50,010 - INFO - number of connections: 1
2025-02-28 12:26:50,011 - INFO - Found 1 overlapping connections
2025-02-28 12:26:50,011 - INFO - Best Connection ID: 2490f037-c1f5-4051-96c9-b9217bcf4700
2025-02-28 12:26:50,012 - INFO - seek: 97.056


2025-02-28 12:26:50,588 - INFO - start: None


In [145]:
transcription_result = await self._perform_audio_transcription(None)

2025-02-28 12:26:52,695 - INFO - Generated new text for history from raw segments (length: 292 chars)
2025-02-28 12:26:52,696 - INFO - Truncated history to last 400 tokens
2025-02-28 12:26:52,697 - INFO - Successfully updated transcription history with 382 chars


In [146]:
speaker_data = await self.redis_client.lrange(f"speaker_data", start=0, end=-1)

In [147]:

speaker_data = [SpeakerMeta.from_json_data(speaker) for speaker in speaker_data]

In [148]:
speaker_data[:5]

[SpeakerMeta(name='Dmitriy Grankin', mic_level=1.0, timestamp=datetime.datetime(2025, 2, 13, 15, 36, 26, tzinfo=datetime.timezone.utc), delay_sec=1.0, meeting_id='the-zdjv-byg', user_id='f3499505-aa58-48d8-bf87-c6e087420611', meta_bits='1111111111'),
 SpeakerMeta(name='Sergey Ryabenko', mic_level=0.0, timestamp=datetime.datetime(2025, 2, 13, 15, 36, 26, tzinfo=datetime.timezone.utc), delay_sec=1.0, meeting_id='the-zdjv-byg', user_id='f3499505-aa58-48d8-bf87-c6e087420611', meta_bits='0000000000'),
 SpeakerMeta(name='Dmitriy Grankin', mic_level=0.8, timestamp=datetime.datetime(2025, 2, 13, 15, 36, 25, tzinfo=datetime.timezone.utc), delay_sec=1.0, meeting_id='the-zdjv-byg', user_id='f3499505-aa58-48d8-bf87-c6e087420611', meta_bits='0011111111'),
 SpeakerMeta(name='Sergey Ryabenko', mic_level=0.0, timestamp=datetime.datetime(2025, 2, 13, 15, 36, 25, tzinfo=datetime.timezone.utc), delay_sec=1.0, meeting_id='the-zdjv-byg', user_id='f3499505-aa58-48d8-bf87-c6e087420611', meta_bits='0000000000

In [149]:
speakers_df = pd.DataFrame([{
            'speaker': s.name,
            'mic': s.mic_level,
            'timestamp': s.timestamp,
            'speaker_delay_sec': s.delay_sec
        } for s in speaker_data])

In [150]:

speakers_df = speakers_df.sort_values(['timestamp', 'mic'], ascending=[True, False]).drop_duplicates(subset=['timestamp'])

In [151]:
processor.seek_timestamp

datetime.datetime(2025, 2, 13, 15, 33, 53, 56000, tzinfo=tzutc())

In [152]:
speakers_df = speakers_df[speakers_df['mic']>threshhold]

In [153]:

speakers_df['timestamp'] = pd.to_datetime(speakers_df['timestamp'], utc=True)
speakers_df['timestamp'] -= pd.to_timedelta(speakers_df['speaker_delay_sec'], unit='s')
speakers_df['timestamp'] = speakers_df['timestamp'].dt.floor('s')
speakers_df = speakers_df.groupby(['timestamp']).agg({'mic': 'max', 'speaker': 'first'}).reset_index()
speakers_df = speakers_df.sort_values(['timestamp', 'mic','speaker'], ascending=[True, False, True])

speakers_df['change'] = speakers_df['speaker'] != speakers_df['speaker'].shift()
speakers_df['change'] = speakers_df['change'].cumsum()

In [154]:
speakers_df.head(20)

Unnamed: 0,timestamp,mic,speaker,change
0,2025-02-13 15:32:18+00:00,0.3,Sergey Ryabenko,1
1,2025-02-13 15:32:19+00:00,0.9,Sergey Ryabenko,1
2,2025-02-13 15:32:20+00:00,0.7,Sergey Ryabenko,1
3,2025-02-13 15:32:21+00:00,0.7,Sergey Ryabenko,1
4,2025-02-13 15:32:22+00:00,0.6,Sergey Ryabenko,1
5,2025-02-13 15:32:23+00:00,1.0,Sergey Ryabenko,1
6,2025-02-13 15:32:24+00:00,0.9,Sergey Ryabenko,1
7,2025-02-13 15:32:25+00:00,0.9,Sergey Ryabenko,1
8,2025-02-13 15:32:26+00:00,0.7,Sergey Ryabenko,1
9,2025-02-13 15:32:27+00:00,0.6,Sergey Ryabenko,1


In [155]:

diar_df = speakers_df.groupby('change').agg({
                'speaker': 'first',
                'timestamp': ['first', 'last'],
                'mic': 'max'
            }).reset_index(drop=True)
            
diar_df.columns = ['speaker', 'start', 'end', 'mic']

In [156]:
diar_df['duration'] = (diar_df['end']-diar_df['start']).dt.total_seconds()

In [157]:
diar_df

Unnamed: 0,speaker,start,end,mic,duration
0,Sergey Ryabenko,2025-02-13 15:32:18+00:00,2025-02-13 15:32:27+00:00,1.0,9.0
1,Dmitriy Grankin,2025-02-13 15:32:28+00:00,2025-02-13 15:32:28+00:00,0.4,0.0
2,Sergey Ryabenko,2025-02-13 15:32:29+00:00,2025-02-13 15:32:29+00:00,0.4,0.0
3,Dmitriy Grankin,2025-02-13 15:32:30+00:00,2025-02-13 15:32:35+00:00,1.0,5.0
4,Sergey Ryabenko,2025-02-13 15:32:36+00:00,2025-02-13 15:32:45+00:00,1.0,9.0
5,Dmitriy Grankin,2025-02-13 15:32:46+00:00,2025-02-13 15:32:47+00:00,0.8,1.0
6,Sergey Ryabenko,2025-02-13 15:32:48+00:00,2025-02-13 15:32:51+00:00,1.0,3.0
7,Dmitriy Grankin,2025-02-13 15:32:53+00:00,2025-02-13 15:32:55+00:00,1.0,2.0
8,Sergey Ryabenko,2025-02-13 15:32:56+00:00,2025-02-13 15:32:58+00:00,0.8,2.0
9,Dmitriy Grankin,2025-02-13 15:32:59+00:00,2025-02-13 15:33:12+00:00,1.0,13.0


In [158]:
self = processor

In [159]:
whisper_segments = transcription_result['segments']

In [160]:
from app.services.transcription.matcher import TranscriptSegment

In [161]:
transcription_data = [
            TranscriptSegment.from_whisper_segment(
                segment,
                server_timestamp=(self.meeting.start_server_timestamp.isoformat() 
                                if self.meeting.start_server_timestamp else None)
            )
            for segment in whisper_segments
        ]

In [162]:
transcription_data

[TranscriptSegment(content='Вот, а роутер, он кидает на одну, если там занято, перекидывает на следующую.', start_timestamp=0.0, end_timestamp=8.78, speaker=None, confidence=0.46511518038236177, words=[{'word': 'Вот,', 'start': 0.0, 'end': 0.28, 'confidence': 0.2802734375}, {'word': 'а', 'start': 0.4, 'end': 0.64, 'confidence': 0.98291015625}, {'word': 'роутер,', 'start': 0.64, 'end': 1.86, 'confidence': 0.9928385416666666}, {'word': 'он', 'start': 1.86, 'end': 2.44, 'confidence': 0.990234375}, {'word': 'кидает', 'start': 2.44, 'end': 5.84, 'confidence': 0.9427083333333334}, {'word': 'на', 'start': 5.84, 'end': 6.06, 'confidence': 0.98974609375}, {'word': 'одну,', 'start': 6.06, 'end': 6.36, 'confidence': 0.97607421875}, {'word': 'если', 'start': 6.44, 'end': 6.66, 'confidence': 0.9892578125}, {'word': 'там', 'start': 6.66, 'end': 6.94, 'confidence': 0.99658203125}, {'word': 'занято,', 'start': 6.94, 'end': 7.44, 'confidence': 0.9889322916666666}, {'word': 'перекидывает', 'start': 7.44

In [163]:
self = self.matcher
import numpy as np

In [164]:
for segment in transcription_data:
    # Convert relative seconds to absolute timestamps
    start_sec = float(segment.start_timestamp)
    end_sec = float(segment.end_timestamp)
    
    segment_start = pd.to_datetime(self.t0) + pd.Timedelta(seconds=start_sec)
    segment_end = pd.to_datetime(self.t0) + pd.Timedelta(seconds=end_sec)
    
    segment.start_timestamp = segment_start
    segment.end_timestamp = segment_end
    
    # Calculate intersection with speaker segments
    diar_df['intersection'] = np.maximum(
        0,
        np.minimum(diar_df['end'], segment_end) - np.maximum(diar_df['start'], segment_start)
    ).astype('timedelta64[ns]')
    
    # Find best matching speaker
    best_match = diar_df[
        (diar_df['intersection'] > pd.Timedelta(0))
    ].sort_values(['intersection', 'mic'], ascending=[False, False])
    
    if len(best_match) > 0:
        segment.speaker = best_match.iloc[0]['speaker']
        # Set confidence based on intersection ratio and mic level
        intersection_ratio = best_match.iloc[0]['intersection'] / (segment_end - segment_start)
        segment.confidence = float(intersection_ratio * best_match.iloc[0]['mic'])
    else:
        segment.speaker = None
        segment.confidence = 0.0

In [165]:
matched_segments = transcription_data

In [166]:
diar_df

Unnamed: 0,speaker,start,end,mic,duration,intersection
0,Sergey Ryabenko,2025-02-13 15:32:18+00:00,2025-02-13 15:32:27+00:00,1.0,9.0,0 days 00:00:00
1,Dmitriy Grankin,2025-02-13 15:32:28+00:00,2025-02-13 15:32:28+00:00,0.4,0.0,0 days 00:00:00
2,Sergey Ryabenko,2025-02-13 15:32:29+00:00,2025-02-13 15:32:29+00:00,0.4,0.0,0 days 00:00:00
3,Dmitriy Grankin,2025-02-13 15:32:30+00:00,2025-02-13 15:32:35+00:00,1.0,5.0,0 days 00:00:00
4,Sergey Ryabenko,2025-02-13 15:32:36+00:00,2025-02-13 15:32:45+00:00,1.0,9.0,0 days 00:00:00
5,Dmitriy Grankin,2025-02-13 15:32:46+00:00,2025-02-13 15:32:47+00:00,0.8,1.0,0 days 00:00:00
6,Sergey Ryabenko,2025-02-13 15:32:48+00:00,2025-02-13 15:32:51+00:00,1.0,3.0,0 days 00:00:00
7,Dmitriy Grankin,2025-02-13 15:32:53+00:00,2025-02-13 15:32:55+00:00,1.0,2.0,0 days 00:00:00
8,Sergey Ryabenko,2025-02-13 15:32:56+00:00,2025-02-13 15:32:58+00:00,0.8,2.0,0 days 00:00:00
9,Dmitriy Grankin,2025-02-13 15:32:59+00:00,2025-02-13 15:33:12+00:00,1.0,13.0,0 days 00:00:00


In [167]:
import pandas as pd

In [168]:
pd.options.display.max_colwidth = 1000

In [169]:
self = processor

In [170]:

# Calculate user presence for each segment
for segment in matched_segments:
    segment.present_user_ids = [c[0].user_id for c in self.overlapping_connections]


# Create DataFrame and drop 'words' column
segments_dicts = [segment.to_dict() for segment in matched_segments]
df = pd.DataFrame(segments_dicts).drop('words', axis=1)
df

Unnamed: 0,content,start_timestamp,end_timestamp,speaker,confidence,server_timestamp,present_user_ids
0,"Вот, а роутер, он кидает на одну, если там занято, перекидывает на следующую.",2025-02-13 15:33:53.056000+00:00,2025-02-13 15:34:01.836000+00:00,Dmitriy Grankin,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
1,"Ну, ничего себе, прикольно.",2025-02-13 15:34:03.636000+00:00,2025-02-13 15:34:05.656000+00:00,,0.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
2,"Ну, да, то есть такой не лот балансера, а просто он будет, если домашний сервер занят,",2025-02-13 15:34:05.796000+00:00,2025-02-13 15:34:13.236000+00:00,Dmitriy Grankin,0.703763,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
3,"а там только модельки крутятся, больше ничего.",2025-02-13 15:34:13.236000+00:00,2025-02-13 15:34:16.296000+00:00,Dmitriy Grankin,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
4,"Вот, тогда он будет поднимать инстансы в гугле.",2025-02-13 15:34:16.296000+00:00,2025-02-13 15:34:22.036000+00:00,Dmitriy Grankin,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]


In [171]:
processor.audio_slicer.audio

In [139]:
self.done = True

In [140]:
await processor.find_next_seek()

2025-02-28 12:26:32,229 - INFO - seek_timestamp: 2025-02-13 15:33:53.056000+00:00


In [141]:
dfs.append(df)

In [142]:
pd.concat(dfs)

Unnamed: 0,content,start_timestamp,end_timestamp,speaker,confidence,server_timestamp,present_user_ids
0,а я думал ты что-то про ну типа,2025-02-13 15:32:16.430000+00:00,2025-02-13 15:32:22.950000+00:00,Sergey Ryabenko,0.759202,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
0,А как хранить там и так далее?,2025-02-13 15:32:23.104000+00:00,2025-02-13 15:32:24.544000+00:00,Sergey Ryabenko,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
1,"Ну, окей.",2025-02-13 15:32:24.764000+00:00,2025-02-13 15:32:25.704000+00:00,Sergey Ryabenko,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
2,К твоим услугам.,2025-02-13 15:32:26.124000+00:00,2025-02-13 15:32:27.144000+00:00,Sergey Ryabenko,0.858824,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
3,"Да, нужно записать,",2025-02-13 15:32:27.804000+00:00,2025-02-13 15:32:30.784000+00:00,Dmitriy Grankin,0.263087,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
4,"чтобы два спикера пообщались,",2025-02-13 15:32:30.784000+00:00,2025-02-13 15:32:32.744000+00:00,Dmitriy Grankin,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
5,а не один.,2025-02-13 15:32:32.744000+00:00,2025-02-13 15:32:33.224000+00:00,Dmitriy Grankin,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
6,А то тебе скоро Векса начнет советовать,2025-02-13 15:32:36.624000+00:00,2025-02-13 15:32:38.864000+00:00,Sergey Ryabenko,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
7,психологов.,2025-02-13 15:32:38.864000+00:00,2025-02-13 15:32:39.624000+00:00,Sergey Ryabenko,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]
8,"Хоть с кем-нибудь поговори,",2025-02-13 15:32:40.644000+00:00,2025-02-13 15:32:42.204000+00:00,Sergey Ryabenko,1.0,2025-02-13T15:32:16+00:00,[f3499505-aa58-48d8-bf87-c6e087420611]


In [143]:
from datetime import datetime, timezone

In [92]:
for segment in matched_segments:
    transcription = TranscriptStore(
        self.meeting.meeting_id,
        self.redis_client,
        segment.to_dict()
    )
    await transcription.lpush()



In [15]:
from app.services.api.engine_client import EngineAPIClient

In [None]:
await TranscriptStore.push2engine(redis_client, self.engine_client)

In [23]:

transcripts = await TranscriptStore.get_raw_transcript_data(redis_client)
transcripts

{'the-zdjv-byg': [{'content': 'мы там нехило с андреем ну да всякого лишнего но',
   'start_timestamp': '2025-02-13 15:33:08.230000+00:00',
   'end_timestamp': '2025-02-13 15:33:15.310000+00:00',
   'speaker': None,
   'confidence': 0.0,
   'words': [{'word': 'мы',
     'start': 52.23,
     'end': 52.43,
     'confidence': 0.90087890625},
    {'word': 'там', 'start': 52.43, 'end': 52.67, 'confidence': 0.96533203125},
    {'word': 'нехило',
     'start': 52.67,
     'end': 53.29,
     'confidence': 0.51611328125},
    {'word': 'с', 'start': 53.29, 'end': 53.59, 'confidence': 0.96826171875},
    {'word': 'андреем', 'start': 53.59, 'end': 54.15, 'confidence': 0.984375},
    {'word': 'ну', 'start': 54.15, 'end': 55.79, 'confidence': 0.763671875},
    {'word': 'да', 'start': 55.79, 'end': 56.47, 'confidence': 0.9580078125},
    {'word': 'всякого',
     'start': 56.47,
     'end': 58.21,
     'confidence': 0.99609375},
    {'word': 'лишнего',
     'start': 58.21,
     'end': 58.77,
     'con