In [93]:

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [94]:
import logging
from datetime import datetime, timezone

from app.database_redis.connection import get_redis_client
from app.services.apis.streamqueue_service.client import StreamQueueServiceAPI
from app.services.audio.redis import Connection, Diarizer, Meeting, Transcriber
from app.settings import settings

logger = logging.getLogger(__name__)

In [96]:
settings.check_and_process_connections_interval_sec

5.0

In [89]:


class Processor:
    def __init__(self):
        self.__running_tasks = set()
        self.stream_queue_service_api = StreamQueueServiceAPI()

    async def process_connections(self):
        logger.info("Process connections...")
        connections = await self.stream_queue_service_api.get_connections()
        connection_ids = [c[0] for c in connections]

        for connection_id in connection_ids:
            await self._process_connection_task(connection_id)

    async def _process_connection_task(self, connection_id, diarizer_step=60, transcriber_step=5):
        redis_client = await get_redis_client(settings.redis_host, settings.redis_port, settings.redis_password)
        meeting_id, segment_start_timestamp, segment_end_timestamp, user_id = await self.writestream2file(
            connection_id
        )
        segment_start_timestamp = datetime.fromisoformat(segment_start_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") if segment_start_timestamp else None
      #  segment_start_timestamp.tzinfo = timezone.utc
        segment_end_timestamp = datetime.fromisoformat(segment_end_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") if segment_end_timestamp else None
       # segment_end_timestamp.tzinfo = timezone.utc
        current_time = datetime.now(timezone.utc)

        connection = Connection(redis_client, connection_id, user_id)
        await connection.update_timestamps(segment_start_timestamp, segment_end_timestamp)

        meeting = Meeting(redis_client, meeting_id)
        await meeting.load_from_redis()
        await meeting.add_connection(connection.id)
        meeting.diarizer_last_updated_timestamp = meeting.diarizer_last_updated_timestamp or segment_start_timestamp
        meeting.transcriber_last_updated_timestamp = meeting.transcriber_last_updated_timestamp or segment_start_timestamp

        if (current_time - meeting.diarizer_last_updated_timestamp).seconds > diarizer_step:
            diarizer = Diarizer(redis_client)
            await diarizer.add_todo(meeting.meeting_id)
            await meeting.update_diarizer_timestamp(
                segment_start_timestamp, diarizer_last_updated_timestamp=current_time
            )

        if (current_time - meeting.transcriber_last_updated_timestamp).seconds > transcriber_step:
            transcriber = Transcriber(redis_client)
            await transcriber.add_todo(meeting.meeting_id)
            await meeting.update_transcriber_timestamp(
                segment_start_timestamp, transcriber_last_updated_timestamp=current_time
            )

    async def writestream2file(self, connection_id):
        path = f"/audio/{connection_id}.webm"
        first_timestamp = None
        items = await self.stream_queue_service_api.fetch_chunks(connection_id, num_chunks=100)

        if items:
            # if there is no meeting_id in META-data
            meeting_id = connection_id

            for item in items["chunks"]:
                chunk = bytes.fromhex(item["chunk"])
                first_timestamp = datetime.fromisoformat(item["timestamp"].rstrip('Z')).astimezone(timezone.utc) if not first_timestamp else first_timestamp

                # Open the file in append mode
                with open(path, "ab") as file:
                    # Write data to the file
                    file.write(chunk)

                last_timestamp = datetime.fromisoformat(item["timestamp"].rstrip('Z')).astimezone(timezone.utc)

                meeting_id = item["meeting_id"]
                user_id = item["user_id"]

            return meeting_id, first_timestamp, last_timestamp, user_id


In [90]:
diarizer_step=60
transcriber_step=5

In [91]:
self = Processor()

In [74]:
connections = await self.stream_queue_service_api.get_connections()
connection_ids = [c[0] for c in connections]

In [75]:
connection_ids

['02a0f16b-4a17-4315-b267-3f6b147b3465']

In [76]:
connection_id = connection_ids[0]

In [77]:
connection_id

'02a0f16b-4a17-4315-b267-3f6b147b3465'

In [78]:
meeting_id, segment_start_timestamp, segment_end_timestamp, user_id = await self.writestream2file(connection_id)

In [79]:
segment_start_timestamp,segment_end_timestamp

(datetime.datetime(2024, 5, 20, 17, 43, 25, 964953, tzinfo=datetime.timezone.utc),
 datetime.datetime(2024, 5, 20, 17, 47, 16, 989690, tzinfo=datetime.timezone.utc))

In [80]:
meeting_id

'meeting1'

In [81]:
redis_client = await get_redis_client(settings.redis_host, settings.redis_port, settings.redis_password)

In [82]:
#await redis_client.flushdb()

In [83]:
current_time = datetime.now(timezone.utc)

connection = Connection(redis_client, connection_id, user_id)
await connection.update_timestamps(segment_start_timestamp, segment_end_timestamp)

In [84]:
#transcriber_step

In [85]:
current_time

datetime.datetime(2024, 5, 20, 17, 47, 22, 70677, tzinfo=datetime.timezone.utc)

In [86]:


meeting = Meeting(redis_client, meeting_id)

await meeting.update_redis()
await meeting.load_from_redis()
await meeting.add_connection(connection.id)
meeting.diarizer_last_updated_timestamp = meeting.diarizer_last_updated_timestamp or segment_start_timestamp
meeting.transcriber_last_updated_timestamp = meeting.transcriber_last_updated_timestamp or segment_start_timestamp

In [87]:
meeting.diarizer_last_updated_timestamp

datetime.datetime(2024, 5, 20, 17, 44, 24, 248253, tzinfo=tzutc())

In [88]:

if (current_time - meeting.diarizer_last_updated_timestamp).seconds > diarizer_step:
    print("diarizer added")
    diarizer = Diarizer(redis_client)
    await diarizer.add_todo(meeting.meeting_id)
    await meeting.update_diarizer_timestamp(
        segment_start_timestamp, diarizer_last_updated_timestamp=current_time
    )

if (current_time - meeting.transcriber_last_updated_timestamp).seconds > transcriber_step:
    print("transcriber added")
    transcriber = Transcriber(redis_client)
    await transcriber.add_todo(meeting.meeting_id)
    await meeting.update_transcriber_timestamp(
        segment_start_timestamp, transcriber_last_updated_timestamp=current_time
            )

diarizer added
transcriber added


In [92]:
settings

Settings(service_version='0.0.1_example', service_name='Audio API', service_api_host='0.0.0.0', service_api_port=8009, service_token='service_token', check_and_process_connections_interval_sec=5.0, stream_queue_service_list_connections='http://host.docker.internal:8000/api/v1/connections/list', stream_queue_service_flush_cache='http://host.docker.internal:8000/api/v1/tools/flush-cache', stream_queue_service_get_next_chunks='http://host.docker.internal:8000/api/v1/tools/get-next-chunks', stream_queue_service_health='http://host.docker.internal:8000/api/v1/health', stream_queue_service_health_check='http://host.docker.internal:8000/api/v1/hc', stream_queue_service_request_timeout=5, stream_queue_service_auth_token='LKJBn98wefgh', redis_host='redis', redis_port=6379, redis_password='', volume_data_path='/home/dima/ssd/0', redis_image_port='6382')

In [48]:
(current_time - meeting.transcriber_last_updated_timestamp).seconds

0

In [49]:
meeting.transcriber_last_updated_timestamp

datetime.datetime(2024, 5, 20, 17, 40, 20, 332570, tzinfo=datetime.timezone.utc)

In [50]:
meeting.transcriber_last_updated_timestamp

datetime.datetime(2024, 5, 20, 17, 40, 20, 332570, tzinfo=datetime.timezone.utc)

In [51]:
segment_start_timestamp

datetime.datetime(2024, 5, 20, 17, 38, 5, 107331, tzinfo=datetime.timezone.utc)

In [52]:
connection.start_timestamp

datetime.datetime(2024, 5, 20, 17, 38, 5, 107331, tzinfo=tzutc())

In [53]:
meeting.transcriber_last_updated_timestamp

datetime.datetime(2024, 5, 20, 17, 40, 20, 332570, tzinfo=datetime.timezone.utc)