In [1]:

%load_ext autoreload
%autoreload 2

In [2]:
import asyncio
import io
import logging
from datetime import timedelta,datetime,timezone

from faster_whisper import WhisperModel
from app.services.audio.redis import Transcript

from app.database_redis.connection import get_redis_client
from app.services.audio.audio import AudioSlicer
from app.services.audio.redis import Meeting, Transcriber, best_covering_connection
from app.settings import settings


  from .autonotebook import tqdm as notebook_tqdm


check_and_process_connections_interval_sec: 5.0


In [3]:
overlap = 2

In [4]:
logger = logging.getLogger(__name__)

In [5]:
redis_client = await get_redis_client(settings.redis_host, settings.redis_port, settings.redis_password)

In [6]:
Transcriber(meeting

app.services.audio.redis.Transcriber

In [4]:
transcriber = Transcriber(redis_client)
meeting_id = await transcriber.pop_inprogress()

In [5]:
meeting = Meeting(redis_client, meeting_id)
await meeting.load_from_redis()

In [6]:
meeting.start_timestamp

datetime.datetime(2024, 5, 20, 17, 38, 5, 107331, tzinfo=tzutc())

In [8]:
meeting.transcriber_seek_timestamp

datetime.datetime(2024, 5, 20, 17, 38, 5, 107331, tzinfo=tzutc())

In [10]:
seek = meeting.transcriber_seek_timestamp - meeting.start_timestamp

In [11]:
seek

datetime.timedelta(0)

In [18]:
current_time = datetime.now(timezone.utc)

In [19]:
connections = await meeting.get_connections()

In [20]:
connection = best_covering_connection(meeting.diarizer_seek_timestamp, current_time, connections)

In [22]:
max_length = 240

In [23]:
connection = best_covering_connection(meeting.diarizer_seek_timestamp, current_time, connections)
audio_slicer = await AudioSlicer.from_ffmpeg_slice(f"/audio/{connection.id}.webm", seek, max_length)
slice_duration = audio_slicer.audio.duration_seconds
audio_data = await audio_slicer.export_data()

2024-05-20 19:04:23,252 - INFO - app.services.audio.audio - None


In [25]:
model_size = "large-v3"
model = WhisperModel(model_size, device="cuda", compute_type="float16")

In [29]:
segments, _ = model.transcribe(io.BytesIO(audio_data), beam_size=5, vad_filter=True, word_timestamps=True)
segments = [s for s in list(segments)]
logger.info("done")
result = [[w._asdict() for w in s.words] for s in segments]
transcription = Transcript(meeting_id, redis_client, result)
await transcription.lpush()

In [37]:
meeting.transcribe_seek_timestamp = meeting.start_timestamp+seek +timedelta(seconds = slice_duration-overlap)

In [39]:
await transcriber.remove(meeting.meeting_id)

In [40]:
await meeting.update_redis()

In [38]:
slice_duration

240.048

In [30]:
segments

[Segment(id=1, seek=470, start=195.17, end=196.37, text=' Fuck yeah.', tokens=[50365, 10965, 1338, 13, 50486], temperature=0.0, avg_logprob=-0.625, compression_ratio=0.5555555555555556, no_speech_prob=0.164794921875, words=[Word(start=195.17, end=195.97, word=' Fuck', probability=0.70556640625), Word(start=195.97, end=196.37, word=' yeah.', probability=0.70263671875)])]

In [None]:

meeting.transcribe_seek_timestamp = end_of_last_speech + meeting.transcribe_seek_timestamp
await transcriber.remove(meeting.id)
await meeting.update_redis()

In [None]:
logger = logging.getLogger(__name__)

# TODO: implement recurrent prompt from last item


async def process(redis_client, model, max_length=240) -> None:
    transcriber = Transcriber(redis_client)
    meeting_id = await transcriber.pop_inprogress()

    if not meeting_id:
        return

    meeting = Meeting(redis_client, meeting_id)
    await meeting.load_from_redis()
    seek = meeting.transcribe_seek_timestamp - meeting.start_timestamp

    connection = best_covering_connection(seek, meeting.start_timestamp, meeting.get_connections())
    audio_slicer = await AudioSlicer.from_ffmpeg_slice(f"/audio/{connection.id}.webm", seek, seek + max_length)
    audio_data = await audio_slicer.export_data()

    segments, _ = model.transcribe(io.BytesIO(audio_data), beam_size=5, vad_filter=True, word_timestamps=True)
    segments = [s for s in list(segments)]
    logger.info("done")
    result = [[w._asdict() for w in s.words] for s in segments]
    transcription = Transcript(meeting_id, redis_client, result)
    await transcription.lpush()
    end_of_last_speech = timedelta(seconds=result[-1][-1]["end"])

    meeting.transcribe_seek_timestamp = end_of_last_speech + meeting.transcribe_seek_timestamp
    await transcriber.remove(meeting.id)
    await meeting.update_redis()

In [None]:


async def main():
    redis_client = await get_redis_client(settings.redis_host, settings.redis_port, settings.redis_password)

    while True:
        await asyncio.sleep(0.1)
        await process(redis_client, model)


if __name__ == "__main__":
    model_size = "large-v3"
    model = WhisperModel(model_size, device="cuda", compute_type="float16")
    logger.info("Model loaded")
    asyncio.run(main())
