# Training Status Process
> Process to handle training data stream

In [None]:
# | default_exp training_status_process

In [None]:
# | export

import asyncio
import random
import traceback
from contextlib import contextmanager
from datetime import datetime, timedelta
from os import environ
from time import sleep
from typing import *

import numpy as np
import pandas as pd
from airt.logger import get_logger
from airt.patching import patch
from asyncer import asyncify, create_task_group
from fast_kafka_api.application import FastKafkaAPI
from fastapi import FastAPI
from fastcore.meta import delegates
from sqlalchemy import create_engine as sqlalchemy_create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.exc import NoResultFound
from sqlmodel import Session, func, select

import airt_service
from airt_service.data.clickhouse import get_count_for_account_ids
from airt_service.db.models import (
    TrainingStreamStatus,
    User,
    create_connection_string,
    get_db_params_from_env_vars,
    get_engine,
    get_session_with_context,
)
from airt_service.users import User

23-03-06 07:58:29.403 [INFO] airt.executor.subcommand: Module loaded.


In [None]:
import json
import threading
from datetime import datetime
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, call

import pytest
import uvicorn
from _pytest.monkeypatch import MonkeyPatch
from confluent_kafka import Consumer, Producer

from airt_service.confluent import confluent_kafka_config, create_topics_for_user
from airt_service.db.models import create_user_for_testing
from airt_service.helpers import set_env_variable_context
from airt_service.sanitizer import sanitized_print
from airt_service.server import ModelTrainingRequest, create_ws_server
from airt_service.uvicorn_helpers import run_uvicorn

In [None]:
test_username = create_user_for_testing()
display(test_username)

'ezskeqqeos'

In [None]:
# | exporti

logger = get_logger(__name__)

In [None]:
def create_test_update_table() -> Tuple[pd.DataFrame, User]:
    throwaway_username = create_user_for_testing()

    with get_session_with_context() as session:
        user = session.exec(
            select(User).where(User.username == throwaway_username)
        ).one()

    return (
        pd.DataFrame(
            {
                "account_id": [666, 999],
                "application_id": [None, "23"],
                "model_id": ["ChurnModelForDrivers", "Whatever"],
                "total": [1000, 1000],
                "user_id": [user.id] * 2,
                "model_type": ["churn", "churn"],
                "count": [10, 670],
                "event": ["upload", "end"],
            }
        ).set_index("account_id"),
        user,
    )


update_table, user = create_test_update_table()
update_table

Unnamed: 0_level_0,application_id,model_id,total,user_id,model_type,count,event
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
666,,ChurnModelForDrivers,1000,145,churn,10,upload
999,23.0,Whatever,1000,145,churn,670,end


In [None]:
# | export


def update_mysql(
    update_table: pd.DataFrame,
) -> None:
    """
    Method to create event

    Args:
        account_id: account id
        application_id: Id of the application in case there is more than one for the AccountId
        model_id: User supplied ID of the model trained
        model_type: Model type
        event: one of start, upload, end
        count: current count of rows in clickhouse db
        total: total no. of rows sent by user
        user: user object
        session: session object

    """
    training_events = [
        TrainingStreamStatus(**kwargs)  # type: ignore
        for kwargs in update_table.reset_index().to_dict(orient="records")
    ]

    with get_session_with_context() as session:
        for training_event in training_events:
            session.add(training_event)

        session.commit()

In [None]:
update_table, user = create_test_update_table()

update_mysql(update_table=update_table)

with get_session_with_context() as session:
    most_recent_events = session.exec(
        select(TrainingStreamStatus)
        .where(TrainingStreamStatus.user == user)
        .order_by(TrainingStreamStatus.id.desc())
    ).all()

display(most_recent_events)

expected = update_table.sort_index().reindex(sorted(update_table.columns), axis=1)

actual = (
    pd.DataFrame([e.dict() for e in most_recent_events])
    .set_index("account_id")
    .drop(columns=["id", "uuid", "created"])
    .sort_index()
    .reindex(sorted(update_table.columns), axis=1)
)
pd.testing.assert_frame_equal(actual, expected)
np.testing.assert_array_equal(actual["application_id"], (None, "23"))

[TrainingStreamStatus(event=<TrainingEvent.end: 'end'>, account_id=999, model_id='Whatever', model_type='churn', total=1000, user_id=146, uuid=UUID('a50cdf21-8087-41b7-94a3-466f3b03ed51'), id=18, application_id='23', count=670, created=datetime.datetime(2023, 3, 6, 7, 58, 31)),
 TrainingStreamStatus(event=<TrainingEvent.upload: 'upload'>, account_id=666, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=146, uuid=UUID('301a9f11-c08c-480b-8a28-5c405fa54443'), id=17, application_id=None, count=10, created=datetime.datetime(2023, 3, 6, 7, 58, 31))]

In [None]:
def get_mysql_test_table() -> pd.DataFrame:
    d = {
        "application_id": {666: np.nan, 999: "23", 1000: "some app"},
        "model_id": {666: "ChurnModelForDrivers", 999: "Whatever", 1000: "CoolModel"},
        "event": {666: "start", 999: "upload", 1000: "upload"},
        "id": {666: 33, 999: 66, 1000: 1000},
        "uuid": {
            666: "b465060fa1da4af8b9d597ec3c8f8e07",
            999: "9999990fa1da4af8b9d597ec3c999999",
            1000: "0" * 16,
        },
        "prev_count": {666: 0, 999: 670, 1000: 1_000_000},
        "total": {666: 1000, 999: 1000, 1000: 1_000_000},
        "created": {
            666: datetime.utcnow() - timedelta(seconds=1),
            999: datetime.utcnow() - timedelta(seconds=60),
            1000: datetime.utcnow() - timedelta(seconds=1),
        },
        "user_id": {666: 18, 999: 18, 1000: 18},
        "model_type": {666: "churn", 999: "churn", 1000: "churn"},
    }
    return (
        pd.DataFrame(d)
        .reset_index()
        .rename(columns={"index": "AccountId"})
        .set_index("AccountId")
    )


get_mysql_test_table()

Unnamed: 0_level_0,application_id,model_id,event,id,uuid,prev_count,total,created,user_id,model_type
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
666,,ChurnModelForDrivers,start,33,b465060fa1da4af8b9d597ec3c8f8e07,0,1000,2023-03-06 07:58:29.602754,18,churn
999,23,Whatever,upload,66,9999990fa1da4af8b9d597ec3c999999,670,1000,2023-03-06 07:57:30.602757,18,churn
1000,some app,CoolModel,upload,1000,0000000000000000,1000000,1000000,2023-03-06 07:58:29.602757,18,churn


In [None]:
def get_clickhouse_test_table() -> pd.DataFrame:
    return (
        pd.DataFrame(
            {
                "curr_count": [10, 670, 1_000_000],
                "AccountId": [666, 999, 1000],
                "curr_check_on": [datetime.utcnow()] * 3,
            },
            index=[666, 999, 1000],
        )
        .reset_index(drop=True)
        .set_index("AccountId")
    )


get_clickhouse_test_table()

Unnamed: 0_level_0,curr_count,curr_check_on
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1
666,10,2023-03-06 07:58:30.613410
999,670,2023-03-06 07:58:30.613410
1000,1000000,2023-03-06 07:58:30.613410


In [None]:
# | export


@contextmanager
def create_sqlalchemy_engine(
    url: str, **kwargs: Dict[str, Any]
) -> Generator[Engine, None, None]:
    sqlalchemy_engine = sqlalchemy_create_engine(url, **kwargs)  # type: ignore
    try:
        yield sqlalchemy_engine
    finally:
        sqlalchemy_engine.dispose()


def get_recent_events_for_user(user: User) -> pd.DataFrame:
    """
    Get recent event for user

    Args:
        user: user object to get recent events

    Returns:
        A list of recent events for given user
    """
    conn_str = create_connection_string(**get_db_params_from_env_vars())  # type: ignore

    with create_sqlalchemy_engine(conn_str) as engine:
        # Get all rows from table
        df = pd.read_sql_table(table_name="trainingstreamstatus", con=engine)

    # Filter events for given user and group by account_id
    events_for_user = (
        df.loc[df["user_id"] == user.id]
        .sort_values("id", ascending=False)
        .groupby(
            by=["account_id", "application_id", "model_id"],
            as_index=False,
            dropna=False,
        )
        .first()
    )

    events_for_user = events_for_user.rename(
        columns={"count": "prev_count", "account_id": "AccountId"}
    )

    events_for_user = events_for_user.set_index("AccountId")

    # Leave 'end' events
    events_for_user = events_for_user.loc[
        events_for_user["event"] != "end"
    ].sort_values("AccountId", ascending=True)

    return events_for_user

In [None]:
end_count = 1_000_000

with get_session_with_context() as session:
    update_table, user = create_test_update_table()
    display(update_table)
    recent_event_for_user = get_recent_events_for_user(user=user)
    assert recent_event_for_user.empty, recent_event_for_user

    update_mysql(update_table=update_table)

    actual = get_recent_events_for_user(user=user)
    display(actual)
    assert len(actual) == 1
    assert (actual["event"] == "upload").all()
    assert (actual["user_id"] == user.id).all()
    assert (actual.index == 666).all()

Unnamed: 0_level_0,application_id,model_id,total,user_id,model_type,count,event
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
666,,ChurnModelForDrivers,1000,147,churn,10,upload
999,23.0,Whatever,1000,147,churn,670,end


Unnamed: 0_level_0,application_id,model_id,event,id,uuid,prev_count,total,created,user_id,model_type
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
666,,ChurnModelForDrivers,upload,19,9faff1bc03144457bc07cbc7cbe2e244,10,1000,2023-03-06 07:58:31,147,churn


In [None]:
# | export


def get_count_from_training_data_ch_table(
    account_ids: List[Union[int, str]]
) -> pd.DataFrame:
    """
    Get count of all rows for given account ids from clickhouse table

    Args:
        account_ids: List of account_ids to get count

    Returns:
        Count for the given account id
    """
    return get_count_for_account_ids(
        account_ids=account_ids,
        username=environ["KAFKA_CH_USERNAME"],
        password=environ["KAFKA_CH_PASSWORD"],
        host=environ["KAFKA_CH_HOST"],
        port=int(environ["KAFKA_CH_PORT"]),
        database=environ["KAFKA_CH_DATABASE"],
        table=environ["KAFKA_CH_TABLE"],
        protocol=environ["KAFKA_CH_PROTOCOL"],
    )

In [None]:
@contextmanager
def patch_get_count_from_training_data_ch_table():
    with MonkeyPatch.context() as monkeypatch:
        monkeypatch.setattr(
            "__main__.get_count_from_training_data_ch_table",
            lambda account_ids: pd.DataFrame(
                {
                    "curr_count": [999] * len(account_ids),
                    "AccountId": account_ids,
                    "curr_check_on": [datetime.utcnow()] * len(account_ids),
                }
            ).set_index("AccountId"),
        )
        yield


with patch_get_count_from_training_data_ch_table():
    actual = get_count_from_training_data_ch_table(account_ids=[500])
    display(actual)
    assert actual.iloc[0]["curr_count"] == 999, actual

Unnamed: 0_level_0,curr_count,curr_check_on
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1
500,999,2023-03-06 07:58:30.906595


In [None]:
# | export


def get_user(username: str) -> User:
    """Get the user object for the given username

    Args:
        username: Username as a string

    Returns:
        The user object
    """
    with get_session_with_context() as session:
        user: User = session.exec(select(User).where(User.username == username)).one()

    return user

In [None]:
actual = get_user(username=test_username)
assert actual.username == test_username

In [None]:
# | export


def get_new_update_table(
    recent_events_df: pd.DataFrame, ch_df: pd.DataFrame, end_timedelta: int
) -> pd.DataFrame:
    merged = recent_events_df.merge(right=ch_df, how="left", on="AccountId")

    updated = merged["curr_count"] > merged["prev_count"]
    not_update_for_30s = merged["curr_check_on"] - merged["created"] > timedelta(
        seconds=end_timedelta
    )

    df = merged[updated | not_update_for_30s]
    df = df.assign(action="end")

    df.loc[df["curr_count"] > df["prev_count"], "action"] = "upload"

    drop_columns = ["event", "id", "uuid", "prev_count", "created", "curr_check_on"]
    df = df.drop(columns=drop_columns)
    df = df.rename(columns=dict(curr_count="count", action="event"))
    df.index = df.index.rename("account_id")

    df = df.replace({np.nan: None})

    return df

In [None]:
recent_events_df = get_mysql_test_table()
ch_df = get_clickhouse_test_table()
display(recent_events_df)
display(ch_df)

update_table = get_new_update_table(recent_events_df, ch_df, end_timedelta=30)
display(update_table)
assert update_table.shape == (2, 7), update_table.shape
np.testing.assert_array_equal(update_table.index, (666, 999))
assert update_table.index.name == "account_id"
np.testing.assert_array_equal(update_table["event"], ("upload", "end"))
np.testing.assert_array_equal(update_table["count"], (10, 670))
np.testing.assert_array_equal(
    update_table["application_id"].fillna("nan"), ("nan", "23")
)
np.testing.assert_array_equal(update_table["application_id"], (None, "23"))

Unnamed: 0_level_0,application_id,model_id,event,id,uuid,prev_count,total,created,user_id,model_type
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
666,,ChurnModelForDrivers,start,33,b465060fa1da4af8b9d597ec3c8f8e07,0,1000,2023-03-06 07:58:29.934852,18,churn
999,23,Whatever,upload,66,9999990fa1da4af8b9d597ec3c999999,670,1000,2023-03-06 07:57:30.934855,18,churn
1000,some app,CoolModel,upload,1000,0000000000000000,1000000,1000000,2023-03-06 07:58:29.934855,18,churn


Unnamed: 0_level_0,curr_count,curr_check_on
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1
666,10,2023-03-06 07:58:30.936723
999,670,2023-03-06 07:58:30.936723
1000,1000000,2023-03-06 07:58:30.936723


Unnamed: 0_level_0,application_id,model_id,total,user_id,model_type,count,event
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
666,,ChurnModelForDrivers,1000,18,churn,10,upload
999,23.0,Whatever,1000,18,churn,670,end


In [None]:
# | export


async def update_kafka(update_table: pd.DataFrame, kafka_app: FastKafkaAPI) -> None:
    async with create_task_group() as task_group:
        to_infobip_training_data_status = task_group.soonify(
            kafka_app.to_infobip_training_data_status
        )
        drop_columns = ["model_type", "user_id", "event"]
        rename_dict = dict(count="no_of_records", total="total_no_of_records")
        msgs = (
            update_table.drop(columns=drop_columns)
            .rename(columns=rename_dict)
            .reset_index()
            .to_dict(orient="records")
        )
        for kwargs in msgs:
            to_infobip_training_data_status(**kwargs)  # type: ignore

In [None]:
update_table, _ = create_test_update_table()

kafka_app = MagicMock()
kafka_app.to_infobip_training_data_status = AsyncMock()

expected = [
    call(
        account_id=666,
        application_id=None,
        model_id="ChurnModelForDrivers",
        total_no_of_records=1000,
        no_of_records=10,
    ),
    call(
        account_id=999,
        application_id="23",
        model_id="Whatever",
        total_no_of_records=1000,
        no_of_records=670,
    ),
]

await update_kafka(update_table, kafka_app=kafka_app)
assert kafka_app.to_infobip_training_data_status.call_count == 2
assert kafka_app.to_infobip_training_data_status.call_args_list == expected

In [None]:
def exit_after(timeout: int):
    t0 = datetime.now()

    def _f(t0: datetime = t0, timeout: int = timeout) -> bool:
        return datetime.now() - t0 > timedelta(seconds=timeout)

    return _f


should_exit_f = exit_after(1)
assert not should_exit_f()
sleep(2)
assert should_exit_f()

In [None]:
# | export


async def process_training_status(
    username: str,
    fast_kafka_api_app: FastKafkaAPI,
    *,
    should_exit_f: Optional[Callable[[], bool]] = None,
    sleep_min: int = 5,
    sleep_max: int = 20,
    end_timedelta: int = 120,
) -> None:
    """
    An infinite loop to keep track of training_data uploads from user

    Args:
        username: username of user to track training data uploads
    """
    async_get_user = asyncify(get_user)
    async_get_recent_events_for_user = asyncify(get_recent_events_for_user)
    async_get_count_from_training_data_ch_table = asyncify(
        get_count_from_training_data_ch_table
    )
    async_update_mysql = asyncify(update_mysql)

    while should_exit_f is None or not should_exit_f():
        #         logger.info(f"Starting the process loop")
        try:
            user = await async_get_user(username)
            recent_events_df = await async_get_recent_events_for_user(user=user)
            if not recent_events_df.empty:
                ch_df = await async_get_count_from_training_data_ch_table(
                    account_ids=recent_events_df.index.tolist()
                )
                update_table = get_new_update_table(
                    recent_events_df=recent_events_df,
                    ch_df=ch_df,
                    end_timedelta=end_timedelta,
                )
                async with create_task_group() as tg:
                    tg.soonify(update_kafka)(
                        update_table=update_table, kafka_app=fast_kafka_api_app
                    )
                    tg.soonify(async_update_mysql)(update_table=update_table)

        except Exception as e:
            logger.info(
                f"Error in process_training_status - {e}, {traceback.format_exc()}"
            )

        await asyncio.sleep(random.randint(sleep_min, sleep_max))  # nosec B311

In [None]:
username = create_user_for_testing()
kafka_app = MagicMock()
kafka_app.to_infobip_training_data_status = AsyncMock()

msg_count = 1000
account_id = 9000

with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == username)).one()
    test_start_event = TrainingStreamStatus(
        account_id=account_id,
        model_id="ChurnModelForDrivers",
        model_type="churn",
        event="start",
        count=0,
        total=msg_count,
        user=user,
    )
    session.add(test_start_event)
    session.commit()


with patch_get_count_from_training_data_ch_table():
    await process_training_status(
        username=username,
        fast_kafka_api_app=kafka_app,
        should_exit_f=exit_after(10),
        sleep_min=1,
        sleep_max=2,
        end_timedelta=5,
    )

with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == username)).one()

    display(f"All events for account id {account_id}")
    all_events = session.exec(
        select(TrainingStreamStatus)
        .where(TrainingStreamStatus.user == user)
        .where(TrainingStreamStatus.account_id == account_id)
        .order_by(TrainingStreamStatus.id.asc())
    ).all()
    display(all_events)

    assert all_events[-1].event == "end", all_events[-1]
    assert all_events[-1].count == 999, all_events[-1]


assert kafka_app.to_infobip_training_data_status.call_count == 2

expected = [
    call(
        account_id=9000,
        application_id=None,
        model_id="ChurnModelForDrivers",
        total_no_of_records=1000,
        no_of_records=999,
    ),
    call(
        account_id=9000,
        application_id=None,
        model_id="ChurnModelForDrivers",
        total_no_of_records=1000,
        no_of_records=999,
    ),
]

assert kafka_app.to_infobip_training_data_status.call_args_list == expected

'All events for account id 9000'

[TrainingStreamStatus(event=<TrainingEvent.start: 'start'>, account_id=9000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=149, uuid=UUID('bb651e59-c93a-4666-a0c7-1c636cd43f13'), id=21, application_id=None, count=0, created=datetime.datetime(2023, 3, 6, 7, 58, 33)),
 TrainingStreamStatus(event=<TrainingEvent.upload: 'upload'>, account_id=9000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=149, uuid=UUID('40e990e6-0724-4642-8202-4e2694c75401'), id=22, application_id=None, count=999, created=datetime.datetime(2023, 3, 6, 7, 58, 34)),
 TrainingStreamStatus(event=<TrainingEvent.end: 'end'>, account_id=9000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=149, uuid=UUID('7a303825-8cb3-4e86-91cf-b56bfb148fcf'), id=23, application_id=None, count=999, created=datetime.datetime(2023, 3, 6, 7, 58, 40))]

In [None]:
# Integration tests

definitions = [
    "appLaunch",
    "sign_in",
    "sign_out",
    "add_to_cart",
    "purchase",
    "custom_event_1",
    "custom_event_2",
    "custom_event_3",
]


applications = ["DriverApp", "PUBG", "COD"]


def generate_n_rows_for_training_data(n: int, seed: int = 42):
    rng = np.random.default_rng(seed=seed)
    #     account_id = rng.choice([4000, 5000, 500], size=n)
    account_id = 6000
    definition_id = rng.choice(definitions, size=n)
    application_id = rng.choice(applications, size=n)
    model_id = rng.choice(["ChurnModelForDrivers", None], size=n)
    occurred_time_ticks = rng.integers(
        datetime(year=2022, month=1, day=1).timestamp() * 1000,
        datetime(year=2022, month=11, day=1).timestamp() * 1000,
        size=n,
    )
    occurred_time = pd.to_datetime(occurred_time_ticks, unit="ms").strftime(
        "%Y-%m-%dT%H:%M:%S.%f"
    )
    person_id = rng.integers(n // 10, size=n)

    df = pd.DataFrame(
        {
            "AccountId": account_id,
            "ApplicationId": application_id,
            "ModelId": model_id,
            "DefinitionId": definition_id,
            "OccurredTimeTicks": occurred_time_ticks,
            "OccurredTime": occurred_time,
            "PersonId": person_id,
        }
    )
    return json.loads(df.to_json(orient="records"))


generate_n_rows_for_training_data(100)[-1]

{'AccountId': 6000,
 'ApplicationId': 'COD',
 'ModelId': 'ChurnModelForDrivers',
 'DefinitionId': 'sign_in',
 'OccurredTimeTicks': 1652181248527,
 'OccurredTime': '2022-05-10T11:14:08.527000',
 'PersonId': 6}

In [None]:
def delivery_report(err, msg):
    """Called once for each message produced to indicate delivery result.
    Triggered by poll() or flush()."""
    if err is not None:
        sanitized_print("Message delivery failed: {}".format(err))
    else:
        #         sanitized_print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
        pass

In [None]:
def test_process_training_status():
    with get_session_with_context() as session:
        user = session.exec(select(User).where(User.username == test_username)).one()

        p = Producer(confluent_kafka_config)
        msg_count = 1000
        account_id = 6000

        test_start_event = TrainingStreamStatus(
            account_id=account_id,
            model_id="ChurnModelForDrivers",
            model_type="churn",
            event="start",
            count=0,
            total=msg_count,
            user=user,
        )
        session.add(test_start_event)
        session.commit()

        training_data = generate_n_rows_for_training_data(msg_count, seed=999)
        for i in range(msg_count):
            p.produce(
                f"{test_username}_training_data",
                json.dumps(training_data[i]).encode("utf-8"),
                on_delivery=delivery_report,
            )
        p.flush()

    start = datetime.utcnow()
    while True:
        if datetime.utcnow() - start > timedelta(seconds=10 * 60):
            assert None, "Taking too long to finish while loop. Probably loop is stuck."
        sleep(5)
        with get_session_with_context() as session:
            user = session.exec(
                select(User).where(User.username == test_username)
            ).one()
            event = session.exec(
                select(TrainingStreamStatus)
                .where(TrainingStreamStatus.user == user)
                .where(TrainingStreamStatus.account_id == account_id)
                .order_by(TrainingStreamStatus.id.desc())
                .limit(1)
            ).one()
            logger.info(f"event in test is {event}")
            if event.event == "end":
                display(f"All events for account id {account_id}")
                all_events = session.exec(
                    select(TrainingStreamStatus)
                    .where(TrainingStreamStatus.user == user)
                    .where(TrainingStreamStatus.account_id == account_id)
                )
                display([e for e in all_events])
                break


display(f"{test_username=}")
create_topics_for_user(username=test_username)
with set_env_variable_context(variable="JOB_EXECUTOR", value="fastapi"):
    with MonkeyPatch.context() as monkeypatch:
        monkeypatch.setattr(
            "__main__.get_count_from_training_data_ch_table",
            lambda account_ids: pd.DataFrame(
                {
                    "curr_count": [999],
                    "AccountId": 6000,
                    "curr_check_on": [datetime.utcnow()],
                }
            ).set_index("AccountId"),
        )
        app, fast_kafka_api_app = create_ws_server(
            assets_path=Path("../assets"), start_process_for_username=None
        )

        @fast_kafka_api_app.run_in_background()
        async def startup_event():
            await process_training_status(
                username=test_username,
                fast_kafka_api_app=fast_kafka_api_app,
                end_timedelta=30,
            )

        config = uvicorn.Config(app, host="0.0.0.0", port=6010, log_level="debug")

        with run_uvicorn(config):
            # Server started.
            sanitized_print("server started")
            test_process_training_status()

        sanitized_print("server stopped")
        # Server stopped.
# sem.release()
# sem.close()

"test_username='ezskeqqeos'"

23-03-06 07:58:45.103 [INFO] airt_service.confluent: Topic ezskeqqeos_start_training_data created
23-03-06 07:58:45.103 [INFO] airt_service.confluent: Topic ezskeqqeos_training_data created
23-03-06 07:58:45.104 [INFO] airt_service.confluent: Topic ezskeqqeos_realtime_data created
23-03-06 07:58:45.105 [INFO] airt_service.confluent: Topic ezskeqqeos_training_data_status created
23-03-06 07:58:45.106 [INFO] airt_service.confluent: Topic ezskeqqeos_training_model_status created
23-03-06 07:58:45.106 [INFO] airt_service.confluent: Topic ezskeqqeos_model_metrics created
23-03-06 07:58:45.107 [INFO] airt_service.confluent: Topic ezskeqqeos_prediction created


%4|1678089524.988|CONFWARN|rdkafka#producer-1| [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance
%4|1678089524.988|CONFWARN|rdkafka#producer-1| [thrd:app]: Configuration property auto.offset.reset is a consumer property and will be ignored by this producer instance


23-03-06 07:58:45.279 [INFO] airt_service.server: kafka_config={'bootstrap_servers': 'kumaran-airt-service-kafka-1:9092', 'group_id': 'kumaran-airt-service-kafka-1:9092_group', 'auto_offset_reset': 'earliest'}


INFO:     Started server process [65438]
INFO:     Waiting for application startup.


23-03-06 07:58:45.417 [INFO] fast_kafka_api._components.asyncapi: New async specifications generated at: 'asyncapi/spec/asyncapi.yml'
server started


%4|1678089540.307|CONFWARN|rdkafka#producer-2| [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance
%4|1678089540.307|CONFWARN|rdkafka#producer-2| [thrd:app]: Configuration property auto.offset.reset is a consumer property and will be ignored by this producer instance


23-03-06 07:59:06.347 [INFO] __main__: event in test is event=<TrainingEvent.start: 'start'> account_id=6000 model_id='ChurnModelForDrivers' model_type='churn' total=1000 user_id=144 uuid=UUID('26b2af78-d198-4502-b311-cb3f5a82633e') id=24 application_id=None count=0 created=datetime.datetime(2023, 3, 6, 7, 59)
23-03-06 07:59:06.642 [INFO] fast_kafka_api._components.asyncapi: Async docs generated at 'asyncapi/docs'
23-03-06 07:59:06.644 [INFO] fast_kafka_api._components.asyncapi: Output of '$ npx -y -p @asyncapi/generator ag asyncapi/spec/asyncapi.yml @asyncapi/html-template -o asyncapi/docs --force-write'[32m

Done! ✨[0m
[33mCheck out your shiny new generated files at [0m[35m/work/airt-service/notebooks/asyncapi/docs[0m[33m.[0m


23-03-06 07:59:06.645 [INFO] fast_kafka_api.application: _create_producer() : created producer using the config: '{'bootstrap_servers': 'kumaran-airt-service-kafka-1:9092'}'
23-03-06 07:59:06.658 [INFO] fast_kafka_api.application: _create_producer() : 

INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:6010 (Press CTRL+C to quit)


23-03-06 07:59:06.727 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer started.
23-03-06 07:59:06.728 [INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'None_start_training_data'})
23-03-06 07:59:06.729 [INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'None_start_training_data'}
23-03-06 07:59:06.731 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer subscribed.
23-03-06 07:59:06.739 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer started.
23-03-06 07:59:06.740 [INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'None_training_data'})
23-03-06 07:59:06.741 [INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'None_training_data'}
23-03-06 07:59:06.742 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer subscribed.
23-03-06 07:59

23-03-06 07:59:46.599 [INFO] __main__: event in test is event=<TrainingEvent.upload: 'upload'> account_id=6000 model_id='ChurnModelForDrivers' model_type='churn' total=1000 user_id=144 uuid=UUID('9ea065ce-641a-4d38-ad04-bf3814a93bc4') id=25 application_id=None count=999 created=datetime.datetime(2023, 3, 6, 7, 59, 7)
23-03-06 07:59:51.634 [INFO] __main__: event in test is event=<TrainingEvent.upload: 'upload'> account_id=6000 model_id='ChurnModelForDrivers' model_type='churn' total=1000 user_id=144 uuid=UUID('9ea065ce-641a-4d38-ad04-bf3814a93bc4') id=25 application_id=None count=999 created=datetime.datetime(2023, 3, 6, 7, 59, 7)
23-03-06 07:59:56.670 [INFO] __main__: event in test is event=<TrainingEvent.end: 'end'> account_id=6000 model_id='ChurnModelForDrivers' model_type='churn' total=1000 user_id=144 uuid=UUID('b3eb3f0e-152c-46f4-beab-f66b3faf8111') id=26 application_id=None count=999 created=datetime.datetime(2023, 3, 6, 7, 59, 53)


'All events for account id 6000'

[TrainingStreamStatus(event=<TrainingEvent.start: 'start'>, account_id=6000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=144, uuid=UUID('26b2af78-d198-4502-b311-cb3f5a82633e'), id=24, application_id=None, count=0, created=datetime.datetime(2023, 3, 6, 7, 59)),
 TrainingStreamStatus(event=<TrainingEvent.upload: 'upload'>, account_id=6000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=144, uuid=UUID('9ea065ce-641a-4d38-ad04-bf3814a93bc4'), id=25, application_id=None, count=999, created=datetime.datetime(2023, 3, 6, 7, 59, 7)),
 TrainingStreamStatus(event=<TrainingEvent.end: 'end'>, account_id=6000, model_id='ChurnModelForDrivers', model_type='churn', total=1000, user_id=144, uuid=UUID('b3eb3f0e-152c-46f4-beab-f66b3faf8111'), id=26, application_id=None, count=999, created=datetime.datetime(2023, 3, 6, 7, 59, 53))]

INFO:     Shutting down
INFO:     Waiting for application shutdown.


23-03-06 07:59:56.885 [INFO] aiokafka.consumer.group_coordinator: LeaveGroup request succeeded
23-03-06 07:59:56.886 [INFO] aiokafka.consumer.group_coordinator: LeaveGroup request succeeded
23-03-06 07:59:56.886 [INFO] aiokafka.consumer.group_coordinator: LeaveGroup request succeeded
23-03-06 07:59:56.887 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-03-06 07:59:56.888 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-03-06 07:59:56.889 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-03-06 07:59:56.890 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-03-06 07:59:56.891 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-03-06 07:59:56.892 [INFO] fast_kafka_api._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished

INFO:     Application shutdown complete.
INFO:     Finished server process [65438]


server stopped
