In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import asyncio
import logging

from app.redis_transcribe.connection import get_redis_client
from app.settings import settings
from app.services.transcription.processor import Processor

In [3]:
from app.services.audio.redis_models import (
    Meeting,
    Transcriber,
    TranscriptStore,
    TranscriptPrompt,
    best_covering_connection,
    connection_with_minimal_start_greater_than_target,
    get_timestamps_overlap
)

In [4]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [5]:
redis_client = await get_redis_client(settings.redis_host, settings.redis_port,settings.redis_password)

processor = Processor(redis_client, logger, max_length=settings.max_audio_length_sec)

2025-02-27 17:37:40,689 - INFO - Attempting Redis connection to redis:6379
2025-02-27 17:37:40,690 - INFO - Redis password is configured
2025-02-27 17:37:40,693 - INFO - Successfully connected to Redis server


In [None]:
await redis_client.flushdb()

True

In [6]:
ok = await processor.read()

2025-02-27 17:37:43,257 - INFO - seek_timestamp: 2025-02-13 15:32:16+00:00
2025-02-27 17:37:43,258 - INFO - number of connections: 1
2025-02-27 17:37:43,259 - INFO - Found 1 overlapping connections
2025-02-27 17:37:43,259 - INFO - Best Connection ID: 2490f037-c1f5-4051-96c9-b9217bcf4700
2025-02-27 17:37:43,259 - INFO - seek: 0.0
2025-02-27 17:37:44,193 - INFO - start: None


In [7]:
self = processor

In [8]:
transcription_result = await self._perform_audio_transcription(None)

2025-02-27 17:37:53,825 - INFO - Generated new text for history from raw segments (length: 447 chars)
2025-02-27 17:37:53,827 - INFO - Truncated history to last 400 tokens
2025-02-27 17:37:53,828 - INFO - Successfully updated transcription history with 370 chars


In [9]:
matched_segments = await self._process_segments(transcription_result['segments'])

In [10]:
matched_segments

[TranscriptSegment(content='я думал ты что-то про ну типа как хранить там и так далее но окей да нужно нужно записать', start_timestamp=Timestamp('2025-02-13 15:32:16.430000+0000', tz='tzutc()'), end_timestamp=Timestamp('2025-02-13 15:32:30.750000+0000', tz='tzutc()'), speaker='Sergey Ryabenko', confidence=0.4888268156424581, words=[{'word': 'я', 'start': 0.43, 'end': 1.45, 'confidence': 0.06683349609375}, {'word': 'думал', 'start': 1.45, 'end': 5.25, 'confidence': 0.81005859375}, {'word': 'ты', 'start': 5.25, 'end': 5.39, 'confidence': 0.2144775390625}, {'word': 'что', 'start': 5.39, 'end': 5.53, 'confidence': 0.82421875}, {'word': '-то', 'start': 5.53, 'end': 5.65, 'confidence': 0.881103515625}, {'word': 'про', 'start': 5.65, 'end': 5.91, 'confidence': 0.93603515625}, {'word': 'ну', 'start': 5.91, 'end': 6.81, 'confidence': 0.75732421875}, {'word': 'типа', 'start': 6.81, 'end': 7.07, 'confidence': 0.98681640625}, {'word': 'как', 'start': 7.07, 'end': 7.29, 'confidence': 0.986328125},

In [11]:
from datetime import datetime, timezone

In [12]:
for segment in matched_segments:
    transcription = TranscriptStore(
        self.meeting.meeting_id,
        self.redis_client,
        segment.to_dict()
    )
    await transcription.lpush()



In [13]:
self.done = True

In [14]:
await processor.find_next_seek()

2025-02-27 17:37:59,693 - INFO - seek_timestamp: 2025-02-13 15:33:16+00:00


In [15]:
from app.services.api.engine_client import EngineAPIClient

In [17]:
await TranscriptStore.process_all_transcripts(redis_client, self.engine_client)

In [23]:

transcripts = await TranscriptStore.get_raw_transcript_data(redis_client)
transcripts

{'the-zdjv-byg': [{'content': 'мы там нехило с андреем ну да всякого лишнего но',
   'start_timestamp': '2025-02-13 15:33:08.230000+00:00',
   'end_timestamp': '2025-02-13 15:33:15.310000+00:00',
   'speaker': None,
   'confidence': 0.0,
   'words': [{'word': 'мы',
     'start': 52.23,
     'end': 52.43,
     'confidence': 0.90087890625},
    {'word': 'там', 'start': 52.43, 'end': 52.67, 'confidence': 0.96533203125},
    {'word': 'нехило',
     'start': 52.67,
     'end': 53.29,
     'confidence': 0.51611328125},
    {'word': 'с', 'start': 53.29, 'end': 53.59, 'confidence': 0.96826171875},
    {'word': 'андреем', 'start': 53.59, 'end': 54.15, 'confidence': 0.984375},
    {'word': 'ну', 'start': 54.15, 'end': 55.79, 'confidence': 0.763671875},
    {'word': 'да', 'start': 55.79, 'end': 56.47, 'confidence': 0.9580078125},
    {'word': 'всякого',
     'start': 56.47,
     'end': 58.21,
     'confidence': 0.99609375},
    {'word': 'лишнего',
     'start': 58.21,
     'end': 58.77,
     'con