In [1]:

%load_ext autoreload
%autoreload 2

In [2]:
import asyncio
import io
import json
import logging
from datetime import datetime, timezone
from uuid import uuid4

import pandas as pd
import torch
from pyannote.audio import Pipeline
from qdrant_client import QdrantClient, models

from app.database_redis import keys
from app.database_redis.connection import get_redis_client
from app.services.audio.audio import AudioSlicer
from app.services.audio.redis import Diarisation, Diarizer, Meeting, best_covering_connection
from app.settings import settings


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
client = QdrantClient("qdrant", timeout=10)

In [4]:

def get_stored_knn(emb: list, user_id):
    search_result = client.search(
        collection_name="main",
        query_vector=emb,
        limit=1,
        query_filter=models.Filter(
            must=[models.FieldCondition(key="user_id", match=models.MatchValue(value=user_id))]
        ),
    )
    if len(search_result) > 0:
        search_result = search_result[0]
        return search_result.payload["speaker_id"], search_result.score
    else:
        return None, None


async def add_new_speaker_emb(emb: list, redis_client, user_id, speaker_id=None):
    logger.info("Adding new speaker...")
    speaker_id = speaker_id if speaker_id else str(uuid4())
    client.upsert(
        collection_name="main",
        wait=True,
        points=[
            models.PointStruct(id=str(uuid4()), vector=emb, payload={"speaker_id": speaker_id, "user_id": user_id})
        ],
    )
    await redis_client.lpush(keys.SPEAKER_EMBEDDINGS, json.dumps((speaker_id, emb.tolist(), user_id)))
    logger.info(f"Added new speaker {speaker_id}")
    return speaker_id


async def process_speaker_emb(emb: list, redis_client, user_id):
    speaker_id, score = get_stored_knn(emb, user_id)
    logger.info(f"score: {score}")

    if speaker_id:
        if score > 0.95:
            pass
        elif score > 0.75:
            await add_new_speaker_emb(emb, redis_client, user_id, speaker_id=speaker_id)
        else:
            speaker_id = await add_new_speaker_emb(emb, redis_client, user_id)
    else:
        speaker_id = await add_new_speaker_emb(emb, redis_client, user_id)

    return str(speaker_id), score


def parse_segment(segment):
    return segment[0].start, segment[0].end, int(segment[-1].split("_")[1])


async def __get_next_chunk_start(diarization_result, length, shift):

    if len(diarization_result) > 0:
        last_speech = diarization_result[-1]

        ended_silence = length - last_speech["end"]
        logger.info(ended_silence)
        if ended_silence < 2:
            logger.info("interrupted")
            return last_speech["start"] + shift

        else:
            logger.info("non-interrupted")
            return last_speech["end"] + shift

    else:
        return None


In [5]:
redis_client = await get_redis_client(settings.redis_host, settings.redis_port, settings.redis_password)

In [6]:
diarizer = Diarizer(redis_client)
meeting_id = await diarizer.pop_inprogress()

In [7]:
diarizer

<app.services.audio.redis.Diarizer at 0x7f8d015260e0>

In [8]:
meeting_id

'meeting1'

In [33]:
meeting = Meeting(redis_client, meeting_id)
await meeting.load_from_redis()
seek = (meeting.diarizer_seek_timestamp - meeting.start_timestamp).total_seconds()

current_time = datetime.now(timezone.utc)

In [34]:
seek

0.0

In [11]:
current_time

datetime.datetime(2024, 5, 20, 14, 20, 17, 894252, tzinfo=datetime.timezone.utc)

In [12]:
connections = await meeting.get_connections()

In [13]:
connections

[<app.services.audio.redis.Connection at 0x7f8d01526b00>]

In [14]:
connection = connections[0]

In [15]:
connection.start_timestamp

datetime.datetime(2024, 5, 20, 14, 12, 48, 270419, tzinfo=tzutc())

In [16]:
await connection.load_from_redis()

In [17]:
connection.end_timestamp

datetime.datetime(2024, 5, 20, 14, 17, 45, 766640, tzinfo=tzutc())

In [19]:
seek

datetime.timedelta(0)

In [20]:
meeting.diarizer_seek_timestamp

datetime.datetime(2024, 5, 20, 14, 12, 48, 270419, tzinfo=tzutc())

In [21]:
connections[0].end_timestamp

datetime.datetime(2024, 5, 20, 14, 17, 45, 766640, tzinfo=tzutc())

In [22]:
meeting.diarizer_seek_timestamp, current_time

(datetime.datetime(2024, 5, 20, 14, 12, 48, 270419, tzinfo=tzutc()),
 datetime.datetime(2024, 5, 20, 14, 20, 17, 894252, tzinfo=datetime.timezone.utc))

In [24]:
connections

[<app.services.audio.redis.Connection at 0x7f8d01526b00>]

In [25]:
connection = best_covering_connection(meeting.diarizer_seek_timestamp, current_time, connections)

In [29]:
max_length = 240

In [32]:
seek.total_seconds()

0.0

In [38]:
seek

0.0

In [39]:
seek + max_length

240.0

In [41]:
connection.id

'43c4f006-f5c6-499f-978a-5bd1075d1f9d'

In [42]:
f"/audio/{connection.id}.webm"

'/audio/43c4f006-f5c6-499f-978a-5bd1075d1f9d.webm'

In [44]:
connection = best_covering_connection(meeting.diarizer_seek_timestamp, current_time, connections)
audio_slicer = await AudioSlicer.from_ffmpeg_slice(f"/audio/{connection.id}.webm", 20, 100)
slice_duration = audio_slicer.audio.duration_seconds
audio_data = await audio_slicer.export_data()

CouldntDecodeError: Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
  libavutil      56. 70.100 / 56. 70.100
  libavcodec     58.134.100 / 58.134.100
  libavformat    58. 76.100 / 58. 76.100
  libavdevice    58. 13.100 / 58. 13.100
  libavfilter     7.110.100 /  7.110.100
  libswscale      5.  9.100 /  5.  9.100
  libswresample   3.  9.100 /  3.  9.100
  libpostproc    55.  9.100 / 55.  9.100
[cache @ 0x565390e1a340] Inner protocol failed to seekback end : -38
    Last message repeated 1 times
[mp3 @ 0x565390e19740] Failed to read frame size: Could not seek to 1026.
[cache @ 0x565390e1a340] Statistics, cache hits:0 cache misses:0
cache:pipe:0: Invalid argument


In [None]:

output, embeddings = pipeline(io.BytesIO(audio_data), return_embeddings=True)

In [None]:

if len(embeddings) == 0:
    logger.info("No embeddings found, skipping...")
    return

speakers = [await process_speaker_emb(e, redis_client, connection.user_id) for e in embeddings]

segments = [i for i in output.itertracks(yield_label=True)]
df = pd.DataFrame([parse_segment(s) for s in segments], columns=["start", "end", "speaker_id"])
df["speaker"] = df["speaker_id"].replace({i: s[0] for i, s in enumerate(speakers)})
df["score"] = df["speaker_id"].replace({i: s[1] for i, s in enumerate(speakers)})
result = df.drop(columns=["speaker_id"]).to_dict("records")

diarization = Diarisation(meeting_id, redis_client, result)
await diarization.lpush()

start_ = await __get_next_chunk_start(result, slice_duration, current_time - seek)
seek = start_+current_time if start_ else seek + slice_duration

meeting.diarize_seek_timestamp = seek
await diarizer.remove(meeting.meeting_id)
await meeting.update_redis()