In [13]:

%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0,'/app')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:

from uuid import uuid4
from pyannote.audio import Pipeline
import io
import pandas as pd
from qdrant_client import QdrantClient, models
from audio.redis import *
import asyncio
import torch

client = QdrantClient("qdrant")

In [15]:
#client.delete_collection(client.get_collections().collections[0].name)

In [16]:
# client.create_collection(
#     collection_name='main',
#     vectors_config=models.VectorParams(size=256, distance=models.Distance.COSINE),
#     hnsw_config=models.HnswConfigDiff(
#         payload_m=16,
#         m=0,
#     ),
# )
# client.create_payload_index(
#     collection_name='main',
#     field_name="client_id",
#     field_schema=models.PayloadSchemaType.KEYWORD,
# )

In [26]:


def get_stored_knn(emb:list, client_id):
    search_result = client.search(
        collection_name='main', 
        query_vector=emb, 
        limit=1,
        query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="client_id",
                match=models.MatchValue(
                    value=client_id,
                ),
            )
        ]
    ),  
        )
    if len(search_result)>0:
        search_result = search_result[0]
        return search_result.payload['speaker_id'], search_result.score
    else: return None,None


def add_new_speaker_emb(emb:list,client_id,speaker_id=None):
    speaker_id = speaker_id if speaker_id else str(uuid4())

    client.upsert(
        collection_name='main',
        wait=True,
        points=[models.PointStruct(id=str(uuid4()), vector=emb,payload={'speaker_id':speaker_id,'client_id':client_id})]

    )

    return speaker_id



def process_speaker_emb(emb:list,client_id):
    speaker_id, score = get_stored_knn(emb, client_id)
    print(score)
    if speaker_id:
        if score > 0.95:
            pass
        elif score > 0.75:
            add_new_speaker_emb(emb,client_id,speaker_id=speaker_id)
        else:
            speaker_id = add_new_speaker_emb(emb,client_id)
    else:
        speaker_id = add_new_speaker_emb(emb,client_id)

    return str(speaker_id), score


def parse_segment(segment):
    return segment[0].start, segment[0].end,int(segment[-1].split('_')[1])


In [19]:

pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token="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")
pipeline.to(torch.device("cuda"))

torchvision is not available - cannot save figures


<pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization at 0x7eff4c5341f0>

In [21]:
redis_client = await get_inner_redis()

In [22]:
_,item = await redis_client.brpop('Audio2DiarizeQueue')
audio_name,client_id = item.split(':')
audio = Audio(audio_name,redis_client)

In [23]:
await audio.get()

True

In [24]:
output, embeddings = pipeline(io.BytesIO(audio.data), return_embeddings=True)

In [27]:
speakers =[process_speaker_emb(e,client_id) for e in embeddings]

0.99999994
0.79216456


In [29]:
segments = [i for i in output.itertracks(yield_label=True)]

In [31]:
speakers

[('4f31270c-2ba6-482f-a387-c1c16c86f779', 0.99999994),
 ('51c9dd8a-acf0-4519-b458-5d2ca5ac2add', 0.79216456)]

In [36]:

df = pd.DataFrame([parse_segment(s) for s in segments],columns = ['start','end','speaker_id'])

In [37]:

df['speaker'] = df['speaker_id'].replace({i:s[0] for i,s in enumerate(speakers)})

In [38]:
df

Unnamed: 0,start,end,speaker_id,speaker
0,0.008489,6.290323,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
1,5.152801,5.611205,0,4f31270c-2ba6-482f-a387-c1c16c86f779
2,7.275042,10.16129,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
3,10.976231,13.964346,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
4,15.645161,18.548387,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
5,18.85399,28.718166,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
6,30.297114,32.691002,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
7,33.981324,37.054329,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
8,41.044143,42.758913,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add
9,48.005093,50.33107,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add


In [40]:
df['score'] = df['speaker_id'].replace({i:s[1] for i,s in enumerate(speakers)})

In [41]:
df.drop(columns=['speaker_id'])

Unnamed: 0,start,end,speaker_id,speaker,score
0,0.008489,6.290323,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
1,5.152801,5.611205,0,4f31270c-2ba6-482f-a387-c1c16c86f779,1.0
2,7.275042,10.16129,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
3,10.976231,13.964346,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
4,15.645161,18.548387,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
5,18.85399,28.718166,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
6,30.297114,32.691002,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
7,33.981324,37.054329,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
8,41.044143,42.758913,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165
9,48.005093,50.33107,1,51c9dd8a-acf0-4519-b458-5d2ca5ac2add,0.792165


In [42]:

diarization_data = df.to_dict('records')
await Diarisation(audio_name,redis_client,diarization_data).save()
await redis_client.lpush(f'DiarizeReady:{audio_name}', 'Done')

1