In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')

In [3]:
import io
import json
import logging
import aiohttp
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, List, Union
from uuid import uuid4

import pandas as pd
from redis.asyncio.client import Redis

from app.redis_transcribe import keys
from app.services.audio.audio import AudioFileCorruptedError, AudioSlicer
from app.services.audio.redis_models import (
    Meeting,
    Transcriber,
    Transcript,
    best_covering_connection,
    connection_with_minimal_start_greater_than_target,
)
from app.services.transcription.matcher import TranscriptSpeakerMatcher, TranscriptSegment, SpeakerMeta


In [4]:
import asyncio
import logging

from app.redis_transcribe.connection import get_redis_client
from app.settings import settings
from app.services.transcription.processor import Processor




redis_client = await get_redis_client(settings.redis_host, settings.redis_port,settings.redis_password)

processor = Processor(redis_client,max_length=240)



2025-02-13 18:41:25,128 - INFO - Attempting Redis connection to redis:6379
2025-02-13 18:41:25,129 - INFO - Redis password is configured
2025-02-13 18:41:25,133 - INFO - Successfully connected to Redis server


In [5]:
self = processor

In [12]:
ok = await processor.read()
ok
self.slice_duration
self.max_length
self.slice_duration/self.max_length
processor.whisper_service_url = 'http://host.docker.internal:8033'
await processor.transcribe()
await processor.find_next_seek()
await processor.do_finally()
from app.services.audio.redis_models import Transcript
transcript = Transcript(processor.meeting.meeting_id, processor.redis_client)
len(await Transcript.get_all_transcripts(processor.redis_client))
df = await Transcript.get_all_transcripts_df(processor.redis_client)
df.reset_index(inplace=True)
len(df)

2025-02-13 18:43:32,894 - INFO - Meeting ID: mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
2025-02-13 18:43:32,896 - INFO - seek_timestamp: 2025-02-13 15:44:07.936000+00:00
2025-02-13 18:43:32,897 - INFO - number of connections: 1
2025-02-13 18:43:32,897 - INFO - Connection ID: 2490f037-c1f5-4051-96c9-b9217bcf4700
2025-02-13 18:43:32,897 - INFO - seek: 711.936


2025-02-13 18:44:42,301 - INFO - start: None
2025-02-13 18:48:30,567 - INFO - pushed
2025-02-13 18:48:30,570 - INFO - seek_timestamp: 2025-02-13 16:39:10.360000+00:00


added to todo


1305

In [17]:
df.tail(30)

Unnamed: 0,index,content,start_timestamp,end_timestamp,speaker,confidence,segment_id,words,meeting_id
1275,1163,"Ну, там, да, такая лицензия,",2025-02-13 16:38:04.326000+00:00,2025-02-13 16:38:06.366000+00:00,,0.0,1164,"[[3236.39, 3236.83, Ну,, 0.96435546875], [323...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1276,1164,"наверное, будет, что если ты сам",2025-02-13 16:38:06.386000+00:00,2025-02-13 16:38:08.426000+00:00,,0.0,1165,"[[3238.45, 3238.67, наверное,, 0.99365234375]...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1277,1165,"для себя разворачиваешь,",2025-02-13 16:38:08.426000+00:00,2025-02-13 16:38:09.746000+00:00,,0.0,1166,"[[3240.49, 3240.67, для, 0.99951171875], [324...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1278,1166,"ну, то есть, например, ты компания,",2025-02-13 16:38:09.906000+00:00,2025-02-13 16:38:12.226000+00:00,,0.0,1167,"[[3241.97, 3242.27, ну,, 0.369873046875], [32...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1279,1167,и внутри компании все это разворачиваешь.,2025-02-13 16:38:12.226000+00:00,2025-02-13 16:38:14.426000+00:00,,0.0,1168,"[[3244.29, 3244.43, и, 0.8486328125], [3244.4...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1280,1168,"Ага. Ты разворачивай,",2025-02-13 16:38:14.846000+00:00,2025-02-13 16:38:16.586000+00:00,,0.0,1169,"[[3246.91, 3247.35, Ага., 0.70263671875], [32...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1281,1169,пожалуйста.,2025-02-13 16:38:16.626000+00:00,2025-02-13 16:38:16.966000+00:00,,0.0,1170,"[[3248.69, 3249.03, пожалуйста., 0.99462890625]]",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1282,1170,Пользуйся.,2025-02-13 16:38:19.466000+00:00,2025-02-13 16:38:19.906000+00:00,,0.0,1171,"[[3251.53, 3251.97, Пользуйся., 0.83605957031...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1283,1171,"Если ты хочешь,",2025-02-13 16:38:20.506000+00:00,2025-02-13 16:38:22.106000+00:00,,0.0,1172,"[[3252.57, 3253.01, Если, 0.98291015625], [32...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
1284,1172,если ты хочешь взять это просто опубликовать,2025-02-13 16:38:22.106000+00:00,2025-02-13 16:38:23.866000+00:00,,0.0,1173,"[[3254.17, 3254.33, если, 0.0090789794921875]...",mock_user_2490f037-c1f5-4051-96c9-b9217bcf4700
