In [None]:
# | default_exp server

In [None]:
# | export


import asyncio
from datetime import datetime
from enum import Enum
from os import environ
from pathlib import Path
from typing import *

import numpy as np
import pandas as pd
import yaml
from aiokafka.helpers import create_ssl_context
from airt.logger import get_logger
from asyncer import asyncify, create_task_group
from fastapi import FastAPI, Request, Response
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
from fastapi.openapi.utils import get_openapi
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse
from fastapi.staticfiles import StaticFiles
from fastkafka import FastKafka
from pydantic import (BaseModel, EmailStr, Field, HttpUrl, NonNegativeInt,
                      validator)
from sqlmodel import select

import airt_service
from airt_service.auth import auth_router
from airt_service.confluent import aio_kafka_config
from airt_service.data.clickhouse import get_all_person_ids_for_account_ids
from airt_service.data.datablob import datablob_router
from airt_service.data.datasource import datasource_router
from airt_service.db.models import User, get_session_with_context
from airt_service.model.prediction import model_prediction_router
from airt_service.model.train import model_train_router
from airt_service.sanitizer import sanitized_print
from airt_service.training_status_process import (TrainingStreamStatus,
                                                  process_training_status)
from airt_service.users import user_router

23-04-13 11:10:44.271 [INFO] airt.executor.subcommand: Module loaded.


In [None]:
import contextlib
import json
import threading
import time
from datetime import timedelta

import nest_asyncio
import pytest
import uvicorn
from _pytest.monkeypatch import MonkeyPatch
from confluent_kafka import Consumer, Producer
from fastapi.testclient import TestClient
from fastkafka.testing import Tester
from starlette.datastructures import Headers

from airt_service.confluent import (confluent_kafka_config,
                                    create_topics_for_user)
from airt_service.db.models import create_user_for_testing
from airt_service.helpers import set_env_variable_context
from airt_service.uvicorn_helpers import run_uvicorn

In [None]:
# | exporti

logger = get_logger(__name__)

In [None]:
# | export

description = """
# airt service to import, train and predict events data

## Python client

To use python library please visit: <a href="https://docs.airt.ai" target="_blank">https://docs.airt.ai</a>

## How to use

To access the airt service, you must create a developer account. Please fill out the signup form below to get one:

[https://bit.ly/3hbXQLY](https://bit.ly/3hbXQLY)

Upon successful verification, you will receive the username and password for the developer account to your email.

### 0. Authenticate

Once you receive the username and password, please authenticate the same by calling the `/token` API. The API 
will return a bearer token if the authentication is successful.

```console
curl -X 'POST' \
  'https://api.airt.ai/token' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/x-www-form-urlencoded' \
  -d 'grant_type=&username=<username>&password=<password>&scope=&client_id=&client_secret='
```

You can either use the above bearer token or create additional apikey's for accessing the rest of the API's. 

To create additional apikey's, please call the `/apikey` API by passing the bearer token along with the 
details of the new apikey in the request. e.g:

```console
curl -X 'POST' \
  'https://api.airt.ai/apikey' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>' \
  -H 'Content-Type: application/json' \
  -d '{
  "name": "<apikey_name>",
  "expiry": "<datetime_in_ISO_8601_format>"
}'
```

### 1. Connect data

Establishing the connection with the data source is a two-step process. The first step allows 
you to pull the data into airt servers and the second step allows you to perform necessary data 
pre-processing that are required model training.

Currently, we support importing data from:

- files stored in the AWS S3 bucket,
- databases like MySql, ClickHouse, and 
- local CSV/Parquet files,

We plan to support other databases and storage medium in the future.

To pull the data from a S3 bucket, please call the `/from_s3` API

```console
curl -X 'POST' \
  'https://api.airt.ai/datablob/from_s3' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>' \
  -H 'Content-Type: application/json' \
  -d '{
  "uri": "s3://bucket/folder",
  "access_key": "<access_key>",
  "secret_key": "<secret_key>",
  "tag": "<tag_name>"
}'
```

Calling the above API will start importing the data in the background. This may take a while to complete depending on the size of the data.

You can also check the data importing progress by calling the `/datablob/<datablob_id>` API

```console
curl -X 'GET' \
  'https://api.airt.ai/datablob/<datablob_id>' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>'
```

Once the data import is completed, you can either call `/from_csv` or `/from_parquet` API for data pre-processing. Below is an 
example to pre-process an imported CSV data.

```
curl -X 'POST' \
'https://api.airt.ai/datablob/<datablob_id>/from_csv' \
-H 'accept: application/json' \
-H 'Authorization: Bearer <bearer_token>' \
-H 'Content-Type: application/json' \
-d '{
  "deduplicate_data": <deduplicate_data>,
  "index_column": "<index_column>",
  "sort_by": "<sort_by>",
  "blocksize": "<block_size>",
  "kwargs": {}
}'
```

### 2. Train

For model training, we assume the input data includes the following:

- a column identifying a client client_column (person, car, business, etc.),
- a column specifying a type of event we will try to predict target_column (buy, checkout, click on form submit, etc.), and
- a timestamp column specifying the time of an occurred event.

The input data can have additional features of any type and will be used to make predictions more accurate. Finally, we need to 
know how much ahead we wish to make predictions. Please use the parameter predict_after to specify the period based on your needs.

In the following example, we will train a model to predict which users will perform a purchase event (*purchase) 3 hours before they acctually do it:

```console
curl -X 'POST' \
  'https://api.airt.ai/model/train' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>' \
  -H 'Content-Type: application/json' \
  -d '{
  "data_id": <datasource_id>,
  "client_column": "<client_column>",
  "target_column": "<target_column>",
  "target": "*checkout",
  "predict_after": 10800
}'
```

Calling the above API will start the model training in the background. This may take a while to complete and you can check the 
training progress by calling the `/model/<model_id>` API.

```console
curl -X 'GET' \
  'https://api.airt.ai/model/<model_id>' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>'
```

After training is complete, you can check the quality of the model by calling the `/model/<model_id>/evaluate` API. This API 
will return model validation metrics like model accuracy, precision and recall.

```console
curl -X 'GET' \
  'https://api.airt.ai/model/<model_id>/evaluate' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>'
```

### 3. Predict

Finally, you can run the predictions by calling the /model/<model_id>/predict API:

```console
curl -X 'POST' \
  'https://api.airt.ai/model/<model_id>/predict' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>' \
  -H 'Content-Type: application/json' \
  -d '{
  "data_id": <datasource_id>
}'
```
Calling the above API will start running the model prediction in the background. This may take a while to complete and you can check the training progress by calling the /prediction/<prediction_id> API.

```console
curl -X 'GET' \
  'https://api.airt.ai/prediction/<prediction_id>' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>'
```

If the dataset is small, then you can call `/prediction/<prediction_id>/pandas` to get prediction results as a pandas dataframe convertible json format:

```console
curl -X 'GET' \
  'https://api.airt.ai/prediction/<prediction_id>/pandas' \
  -H 'accept: application/json' \
  -H 'Authorization: Bearer <bearer_token>'
```

In many cases, it's much better to push the prediction results to remote destinations. Currently, we support pushing the prediction results to a AWS S3 bucket, MySql database and download to the local machine.

To push the predictions to a S3 bucket, please call the `/prediction/<prediction_id>/to_s3` API

```
curl -X 'POST' \
  'https://api.airt.ai/prediction/<prediction_id>/to_s3' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
  "uri": "s3://bucket/folder", 
  "access_key": "<access_key>", 
  "secret_key": "<secret_key>",
  }'
```

"""

In [None]:
# | export

def json_datetime_sec_encoder(dt: datetime) -> str:
    return dt.strftime('%Y-%m-%dT%H:%M:%S')

In [None]:
dt = datetime.fromisoformat("2023-01-01T12:34:56.789012")
expected = '2023-01-01T12:34:56'
actual = json_datetime_sec_encoder(dt)
assert actual == expected

In [None]:
# | export


class ModelType(str, Enum):
    churn = "churn"
    propensity_to_buy = "propensity_to_buy"


class ModelTrainingRequest(BaseModel):
    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )
    model_type: ModelType = Field(
        ..., description="Model type, only 'churn' is supported right now"
    )
    total_no_of_records: NonNegativeInt = Field(
        ...,
        example=1_000_000,
        description="approximate total number of records (rows) to be ingested",
    )


class EventData(BaseModel):
    """
    A sequence of events for a fixed account_id
    """

    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )

    DefinitionId: str = Field(
        ...,
        example="appLaunch",
        description="name of the event",
        min_length=1,
    )
    OccurredTime: datetime = Field(
        ...,
        example="2021-03-28T00:34:08",
        description="local time of the event",
    )
    OccurredTimeTicks: NonNegativeInt = Field(
        ...,
        example=1616891648496,
        description="local time of the event as the number of ticks",
    )
    PersonId: NonNegativeInt = Field(
        ..., example=12345678, description="ID of a person"
    )


class RealtimeData(EventData):
    pass


class TrainingDataStatus(BaseModel):
    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )

    no_of_records: NonNegativeInt = Field(
        ...,
        example=12_345,
        description="number of records (rows) ingested",
    )
    total_no_of_records: NonNegativeInt = Field(
        ...,
        example=1_000_000,
        description="total number of records (rows) to be ingested",
    )

class TrainingModelStart(BaseModel):
    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )
    no_of_records: NonNegativeInt = Field(
        ...,
        example=1_000_000,
        description="number of records (rows) in the DB used for training",
    )

class TrainingModelStatus(BaseModel):
    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )

    current_step: NonNegativeInt = Field(
        ...,
        example=0,
        description="number of records (rows) ingested",
    )
    current_step_percentage: float = Field(
        ...,
        example=0.21,
        description="the percentage of the current step completed",
    )
    total_no_of_steps: NonNegativeInt = Field(
        ...,
        example=1_000_000,
        description="total number of steps for training the model",
    )


class ModelMetrics(BaseModel):
    """The standard metrics for classification models.

    The most important metrics is AUC for unbalanced classes such as churn. Metrics such as
    accuracy are not very useful since they are easily maximized by outputting the most common
    class all the time.
    """

    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )

    timestamp: datetime = Field(
        ...,
        example="2021-03-28T00:34:08",
        description="UTC time when the model was trained",
    )
    model_type: ModelType = Field(
        ...,
        example="churn",
        description="Name of the model used (churn, propensity to buy)",
    )

    auc: float = Field(
        ..., example=0.91, description="Area under ROC curve", ge=0.0, le=1.0
    )
    f1: float = Field(..., example=0.89, description="F-1 score", ge=0.0, le=1.0)
    precission: float = Field(
        ..., example=0.84, description="precission", ge=0.0, le=1.0
    )
    recall: float = Field(..., example=0.82, description="recall", ge=0.0, le=1.0)
    accuracy: float = Field(..., example=0.82, description="accuracy", ge=0.0, le=1.0)
        
    class Config:
        json_encoders = {
            datetime: json_datetime_sec_encoder,
        }


class Prediction(BaseModel):
    AccountId: NonNegativeInt = Field(
        ..., example=202020, description="ID of an account"
    )
    ApplicationId: Optional[str] = Field(
        default=None,
        example="TestApplicationId",
        description="Id of the application in case there is more than one for the AccountId",
    )
    ModelId: Optional[str] = Field(
        default=None,
        example="ChurnModelForDrivers",
        description="User supplied ID of the model trained",
    )

    PersonId: NonNegativeInt = Field(
        ..., example=12345678, description="ID of a person"
    )
    prediction_time: datetime = Field(
        ...,
        example="2021-03-28T00:34:08",
        description="UTC time of prediction",
    )
    model_type: ModelType = Field(
        ...,
        example="churn",
        description="Name of the model used (churn, propensity to buy)",
    )
    score: float = Field(
        ...,
        example=0.4321,
        description="Prediction score (e.g. the probability of churn in the next 28 days)",
        ge=0.0,
        le=1.0,
    )
    class Config:
        json_encoders = {
            datetime: json_datetime_sec_encoder,
        }
        

In [None]:
request = ModelTrainingRequest(AccountId=1234, model_type="churn", total_no_of_records=10_000)

expected = '{"AccountId": 1234, "ApplicationId": null, "ModelId": null, "model_type": "churn", "total_no_of_records": 10000}'
actual = request.json()
assert actual == expected

In [None]:
dt = datetime.fromisoformat("2023-01-01T13:45:59:998877")
model_metrics = ModelMetrics(
    AccountId=1234,
    timestamp=dt,
    model_type="churn",
    auc=0.99,
    f1=0.98,
    precission=0.97,
    recall=0.96,
    accuracy=0.95,
)
expected = '{"AccountId": 1234, "ApplicationId": null, "ModelId": null, "timestamp": "2023-01-01T13:45:59", "model_type": "churn", "auc": 0.99, "f1": 0.98, "precission": 0.97, "recall": 0.96, "accuracy": 0.95}'
actual = model_metrics.json()
assert actual == expected

In [None]:
dt = datetime.fromisoformat("2023-02-03T04:56:07:890123")
prediction = Prediction(
    AccountId=1234,
    PersonId=123456789,
    prediction_time=dt,
    model_type="churn",
    score=0.73,
)
expected = '{"AccountId": 1234, "ApplicationId": null, "ModelId": null, "PersonId": 123456789, "prediction_time": "2023-02-03T04:56:07", "model_type": "churn", "score": 0.73}'
actual = prediction.json()
assert actual == expected

In [None]:
# | export

_total_no_of_records = 1000000
_no_of_records_received = 0

In [None]:
#| export

def _construct_kafka_brokers() -> Dict[str, Dict[str, Any]]:
    url, port = aio_kafka_config["bootstrap_servers"].split(":")

    kafka_brokers = {
        "staging": {
            "url": "pkc-1wvvj.westeurope.azure.confluent.cloud",
            "description": "Staging Kafka broker",
            "port": 9092,
            "protocol": "kafka-secure",
            "security": {"type": "plain"},
        },
        "production": {
            "url": "pkc-1wvvj.westeurope.azure.confluent.cloud",
            "description": "Production Kafka broker",
            "port": 9092,
            "protocol": "kafka-secure",
            "security": {"type": "plain"},
        },
    }

    if (url != kafka_brokers["staging"]["url"]) and (
        url != kafka_brokers["production"]["url"]
    ):
        kafka_brokers["dev"] = {
            "url": url,
            "description": "Development Kafka broker",
            "port": port,
        }
    
    return kafka_brokers

In [None]:
_construct_kafka_brokers()

{'staging': {'url': 'pkc-1wvvj.westeurope.azure.confluent.cloud',
  'description': 'Staging Kafka broker',
  'port': 9092,
  'protocol': 'kafka-secure',
  'security': {'type': 'plain'}},
 'production': {'url': 'pkc-1wvvj.westeurope.azure.confluent.cloud',
  'description': 'Production Kafka broker',
  'port': 9092,
  'protocol': 'kafka-secure',
  'security': {'type': 'plain'}},
 'dev': {'url': 'davor-redpanda',
  'description': 'Development Kafka broker',
  'port': '9092'}}

In [None]:
# | export


def create_fastkafka_application(
    start_process_for_username: Optional[str] = "infobip",
    *,
    sleep_min: int = 5,
    sleep_max: int = 20,
) -> FastKafka:
    """Create a FastKafka service

    Args:
        start_process_for_username: prefix for topics used

    Returns:
        A FastKafka application
    """

    kafka_brokers = _construct_kafka_brokers()

    exclude_keys = ["bootstrap_servers"]
    kafka_config = {
        k: aio_kafka_config[k]
        for k in set(list(aio_kafka_config.keys())) - set(exclude_keys)
    }

    # global description
    version = airt_service.__version__
    contact = dict(name="airt.ai", url="https://airt.ai", email="info@airt.ai")

    fastkafka_app = FastKafka(
        title="airt service kafka api",
        description="kafka api for airt service",
        kafka_brokers=kafka_brokers,
        version=version,
        contact=contact,
        #         group_id="airt-service-kafka-group",
        #         auto_offset_reset="earliest",
        **kafka_config,
    )

    @fastkafka_app.consumes(  # type: ignore
        topic=f"{start_process_for_username}_start_training_data"
    )
    async def on_infobip_start_training_data(msg: ModelTrainingRequest):
        logger.info(f"start training msg={msg}")
        print(f"on_infobip_start_training_data({msg=})")
        with get_session_with_context() as session:
            user = session.exec(
                select(User).where(User.username == start_process_for_username)
            ).one()
            start_event = TrainingStreamStatus(
                event="start",
                account_id=msg.AccountId,
                application_id=msg.ApplicationId,
                model_id=msg.ModelId,
                model_type=msg.model_type,
                count=0,
                total=msg.total_no_of_records,
                user=user,
            )
            session.add(start_event)
            session.commit()

    @fastkafka_app.consumes(topic=f"{start_process_for_username}_training_data")  # type: ignore
    async def on_infobip_training_data(msg: EventData):
        pass

    @fastkafka_app.consumes(topic=f"{start_process_for_username}_realtime_data")  # type: ignore
    async def on_infobip_realtime_data(msg: RealtimeData):
        pass

    @fastkafka_app.produces(  # type: ignore
        topic=f"{start_process_for_username}_training_data_status"
    )
    async def to_infobip_training_data_status(
        account_id: int,
        *,
        application_id: Optional[str] = None,
        model_id: str,
        no_of_records: int,
        total_no_of_records: int,
    ) -> TrainingDataStatus:
        logger.debug(
            f"{start_process_for_username}_training_data_status({account_id=}, {no_of_records=}, {total_no_of_records=})"
        )
        print(
            f"{start_process_for_username}_training_data_status({account_id=}, {no_of_records=}, {total_no_of_records=})"
        )
        msg = TrainingDataStatus(
            AccountId=account_id,
            ApplicationId=application_id,
            ModelId=model_id,
            no_of_records=no_of_records,
            total_no_of_records=total_no_of_records,
        )
        print(f"to_infobip_training_data_status({msg})")
        return msg

    @fastkafka_app.produces(  # type: ignore
        topic=f"{start_process_for_username}_start_training"
    )
    async def to_infobip_start_training(
        account_id: int,
        *,
        application_id: Optional[str] = None,
        model_id: str,
        no_of_records: int,
    ) -> TrainingModelStart:
        print(
            f"{start_process_for_username}_start_training({account_id=}, {application_id=}, {model_id=}, {no_of_records=})"
        )
        msg = TrainingModelStart(
            AccountId=account_id,
            ApplicationId=application_id,
            ModelId=model_id,
            no_of_records=no_of_records,
        )
        print(f"to_infobip_start_training({msg})")
        return msg

    @fastkafka_app.consumes(topic=f"{start_process_for_username}_start_training")  # type: ignore
    async def on_infobip_start_training(msg: TrainingModelStart):
        # update progress
        total_no_of_steps = 5
        for i in range(total_no_of_steps + 1):
            for j in range(3):
                await to_infobip_training_model_status(
                    TrainingModelStatus(
                        AccountId=msg.AccountId,
                        ApplicationId=msg.ApplicationId,
                        ModelId=msg.ModelId,
                        current_step=i,
                        current_step_percentage=0.5 * j
                        if j != 1
                        else round(np.random.uniform(), ndigits=3),
                        total_no_of_steps=total_no_of_steps,
                    )
                )
            await asyncio.sleep(1)

        # send metrics
        await to_infobip_model_metrics(
            ModelMetrics(
                AccountId=msg.AccountId,
                ApplicationId=msg.ApplicationId,
                ModelId=msg.ModelId,
                timestamp=datetime.now(),
                model_type="churn",
                auc=0.946,
                f1=0.934,
                precission=0.976,
                recall=0.987,
                accuracy=0.992,
            )
        )

        # send predictions
        df = get_all_person_ids_for_account_ids(msg.AccountId).reset_index()
        df["score"] = np.random.uniform(size=df.shape[0])
        prediction_time = datetime.now()

        async with create_task_group() as tg:
            s_to_infobip_prediction = tg.soonify(to_infobip_prediction)

            def _f(
                xs: pd.Series,
                msg: TrainingModelStart = msg,
                prediction_time: datetime = prediction_time,
                s_to_infobip_prediction: Callable[
                    [Prediction], Any
                ] = s_to_infobip_prediction,
            ) -> None:
                #                 display(xs)
                s_to_infobip_prediction(
                    Prediction(
                        AccountId=msg.AccountId,
                        ApplicationId=msg.ApplicationId,
                        ModelId=msg.ModelId,
                        PersonId=xs["PersonId"],
                        prediction_time=prediction_time,
                        model_type="churn",
                        score=xs["score"],
                    )
                )

            df.apply(_f, axis=1)  # type: ignore

    @fastkafka_app.produces(  # type: ignore
        topic=f"{start_process_for_username}_training_model_status"
    )
    async def to_infobip_training_model_status(
        msg: TrainingModelStatus,
    ) -> TrainingModelStatus:
        logger.debug(f"on_infobip_training_model_status(msg={msg})")
        print(f"on_infobip_training_model_status(msg={msg})")
        return msg

    @fastkafka_app.produces(topic=f"{start_process_for_username}_model_metrics")  # type: ignore
    async def to_infobip_model_metrics(msg: ModelMetrics) -> ModelMetrics:
        print(f"to_infobip_model_metrics(msg={msg})")
        return msg

#     @fastkafka_app.consumes(topic=f"{start_process_for_username}_model_metrics")  # type: ignore
#     async def on_infobip_model_metrics(msg: ModelMetrics):
#         pass

    @fastkafka_app.produces(topic=f"{start_process_for_username}_prediction")  # type: ignore
    async def to_infobip_prediction(msg: Prediction) -> Prediction:
        return msg

    # todo: move to fastkafka lib
    fastkafka_app.to_infobip_training_data_status = to_infobip_training_data_status
    fastkafka_app.to_infobip_start_training = to_infobip_start_training
    fastkafka_app.to_infobip_training_model_status = to_infobip_training_model_status
    fastkafka_app.to_infobip_prediction = to_infobip_prediction

    if start_process_for_username is not None:

        @fastkafka_app.run_in_background()  # type: ignore
        async def startup_event(fastkafka_app: FastKafka = fastkafka_app) -> None:
            await process_training_status(
                username=start_process_for_username,  # type: ignore
                fast_kafka_api_app=fastkafka_app,
                sleep_min=sleep_min,
                sleep_max=sleep_max,
            )

    return fastkafka_app

In [None]:
np.random.uniform(size=100)

array([0.06682875, 0.6566096 , 0.83024381, 0.60531587, 0.41578702,
       0.71396381, 0.07483185, 0.75978693, 0.31286511, 0.0759263 ,
       0.64647533, 0.6492741 , 0.95678914, 0.8575229 , 0.17131994,
       0.57637587, 0.98631331, 0.99012724, 0.96797731, 0.89817135,
       0.2141635 , 0.00679284, 0.21084249, 0.00435225, 0.58553915,
       0.43504667, 0.69980787, 0.66715982, 0.62977462, 0.89016808,
       0.60413991, 0.58602526, 0.01842006, 0.37696817, 0.95529772,
       0.20125867, 0.67990276, 0.56679117, 0.73815627, 0.18634905,
       0.23420411, 0.1432295 , 0.95675916, 0.56012229, 0.68251637,
       0.16303655, 0.0097613 , 0.18701492, 0.19200206, 0.44830324,
       0.99778598, 0.25695353, 0.33751267, 0.10797319, 0.67510498,
       0.04061822, 0.88462107, 0.45869103, 0.07943771, 0.51067569,
       0.08779485, 0.34974984, 0.72946598, 0.76951132, 0.61689797,
       0.7649754 , 0.2966759 , 0.52540731, 0.64466427, 0.81744993,
       0.81146329, 0.45626006, 0.85056916, 0.67833829, 0.32113

In [None]:
create_user_for_testing(username="infobip")

'infobip'

In [None]:
@contextlib.contextmanager
def monkeypatch_clickhouse(account_id: int, curr_count: int, curr_check_on: Optional[datetime]=None) -> None:
    with MonkeyPatch.context() as monkeypatch:
        monkeypatch.setattr(
            "airt_service.training_status_process.get_count_from_training_data_ch_table",
            lambda account_ids: pd.DataFrame(
                {
                    "curr_count": [curr_count],
                    "AccountId": [account_id],
                    "curr_check_on": [datetime.utcnow()],
                }
            ).set_index("AccountId"),
        )
        
        monkeypatch.setattr(
            "__main__.get_all_person_ids_for_account_ids",
            lambda account_id: pd.DataFrame(
                {
                    "PersonId": np.arange(1000),
                    "AccountId": [account_id]*1000,
                }
            ).set_index("AccountId"),
        )
        yield
 
with monkeypatch_clickhouse(account_id=12345, curr_count=10):
    df = airt_service.training_status_process.get_count_from_training_data_ch_table([12345])
    person_ids = get_all_person_ids_for_account_ids(12345)
    
display(df)
person_ids

Unnamed: 0_level_0,curr_count,curr_check_on
AccountId,Unnamed: 1_level_1,Unnamed: 2_level_1
12345,10,2023-04-13 11:10:44.896663


Unnamed: 0_level_0,PersonId
AccountId,Unnamed: 1_level_1
12345,0
12345,1
12345,2
12345,3
12345,4
...,...
12345,995
12345,996
12345,997
12345,998


In [None]:
fastkafka_app = create_fastkafka_application(sleep_min=1, sleep_max=1)
fastkafka_app

i = 13

with monkeypatch_clickhouse(account_id=12345 + i, curr_count=0):
    #     !mysql -u root -pSuperSecretPassword -h davor-mysql -e 'select * from airt_service.trainingstreamstatus order by id desc limit 5'

    async with Tester(fastkafka_app).using_local_kafka() as tester:
        training_req = ModelTrainingRequest(
            AccountId=12345 + i,
            ModelId="drivers",
            model_type="churn",
            total_no_of_records=10_000,
        )
        await tester.to_infobip_start_training_data(training_req)

        #         !mysql -u root -pSuperSecretPassword -h davor-mysql -e 'select * from airt_service.trainingstreamstatus order by id desc limit 5'

        # send 10 messages
        for _ in range(10):
            training_data = EventData(
                AccountId=12345 + i,
                ModelId="drivers",
                DefinitionId="event_name",
                PersonId=12,
                OccurredTime="2023-03-28T13:41:22.628141",
                OccurredTimeTicks=1680010882628,
            )
            await tester.to_infobip_training_data(training_data)

        #         !mysql -u root -pSuperSecretPassword -h davor-mysql -e 'select * from airt_service.trainingstreamstatus order by id desc limit 5'

        # patch clickhouse fetch function to show 2_000 records were in the database
        with monkeypatch_clickhouse(account_id=12345 + i, curr_count=2_000):
            await tester.awaited_mocks.on_infobip_training_data_status.assert_called_with(
                TrainingDataStatus(
                    AccountId=12345 + i,
                    ApplicationId=None,
                    ModelId="drivers",
                    no_of_records=2000,
                    total_no_of_records=10000,
                ),
                timeout=5,
            )
            print(f"{tester.mocks.on_infobip_training_data_status.await_args_list=}")

        #         # patch clickhouse fetch function to show 10_000 records were in the database
        with monkeypatch_clickhouse(account_id=12345 + i, curr_count=10_000):
            await tester.awaited_mocks.on_infobip_training_data_status.assert_called_with(
                TrainingDataStatus(
                    AccountId=12345 + i,
                    ApplicationId=None,
                    ModelId="drivers",
                    no_of_records=10_000,
                    total_no_of_records=10_000,
                ),
            )

            await tester.awaited_mocks.on_infobip_start_training.assert_called_with(
                TrainingModelStart(
                    AccountId=12345 + i,
                    ApplicationId=None,
                    ModelId="drivers",
                    no_of_records=10_000,
                ),
                timeout=2,
            )

            for k in range(6):
                await tester.awaited_mocks.on_infobip_training_model_status.assert_called_with(
                    TrainingModelStatus(
                        AccountId=12345 + i,
                        ApplicationId=None,
                        ModelId="drivers",
                        current_step=k,
                        current_step_percentage=1.0,
                        total_no_of_steps=5,
                    ),
                    timeout=10,
                )
                
            await tester.awaited_mocks.on_infobip_model_metrics.assert_called(timeout=5)
            
            await tester.awaited_mocks.on_infobip_prediction.assert_called()
            
            for _ in range(3):
                if len(tester.mocks.on_infobip_prediction.await_args_list) < 1000:
                    asyncio.sleep(3)
                    
            assert len(tester.mocks.on_infobip_prediction.await_args_list) == 1000

print("ok")

23-04-13 11:10:44.915 [INFO] fastkafka._application.app: run_in_background() : Adding function 'startup_event' as background task
23-04-13 11:10:44.917 [INFO] fastkafka._components.test_dependencies: Java is already installed.
23-04-13 11:10:44.918 [INFO] fastkafka._components.test_dependencies: But not exported to PATH, exporting...
23-04-13 11:10:44.918 [INFO] fastkafka._components.test_dependencies: Kafka is installed.
23-04-13 11:10:44.919 [INFO] fastkafka._components.test_dependencies: But not exported to PATH, exporting...
23-04-13 11:10:44.920 [INFO] fastkafka._testing.local_broker: Starting zookeeper...
23-04-13 11:10:45.669 [INFO] fastkafka._testing.local_broker: Starting kafka...
23-04-13 11:10:47.610 [INFO] fastkafka._testing.local_broker: Local Kafka broker up and running on 127.0.0.1:9092
23-04-13 11:10:49.682 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:10:49.696 [INFO] fas

infobip_start_training(account_id=12358, application_id=None, model_id='drivers', no_of_records=0)
to_infobip_start_training(AccountId=12358 ApplicationId=None ModelId='drivers' no_of_records=0)
infobip_training_data_status(account_id=12358, no_of_records=0, total_no_of_records=10000)
to_infobip_training_data_status(AccountId=12358 ApplicationId=None ModelId='drivers' no_of_records=0 total_no_of_records=10000)
23-04-13 11:10:49.858 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer started.
23-04-13 11:10:49.859 [INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'infobip_training_model_status'})
23-04-13 11:10:49.860 [INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'infobip_training_model_status'}
23-04-13 11:10:49.860 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer subscribed.
23-04-13 11:10:49.861 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_con

23-04-13 11:10:50.650 [INFO] aiokafka.consumer.group_coordinator: Discovered coordinator 0 for group davor-redpanda:9092_group
23-04-13 11:10:50.651 [INFO] aiokafka.consumer.group_coordinator: Revoking previously assigned partitions set() for group davor-redpanda:9092_group
23-04-13 11:10:50.652 [INFO] aiokafka.consumer.group_coordinator: (Re-)joining group davor-redpanda:9092_group
23-04-13 11:10:50.653 [INFO] aiokafka.consumer.group_coordinator: Discovered coordinator 0 for group davor-redpanda:9092_group
23-04-13 11:10:50.653 [INFO] aiokafka.consumer.group_coordinator: Revoking previously assigned partitions set() for group davor-redpanda:9092_group
23-04-13 11:10:50.654 [INFO] aiokafka.consumer.group_coordinator: (Re-)joining group davor-redpanda:9092_group
23-04-13 11:10:50.672 [INFO] aiokafka.consumer.group_coordinator: Joined group 'davor-redpanda:9092_group' (generation 1) with member_id aiokafka-0.8.0-2c3f091b-7c4a-49d8-b9dc-691f93881337
23-04-13 11:10:50.672 [INFO] aiokafka.c

on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=2 current_step_percentage=0.0 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=2 current_step_percentage=0.998 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=2 current_step_percentage=1.0 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=3 current_step_percentage=0.0 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=3 current_step_percentage=0.816 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None ModelId='drivers' current_step=3 current_step_percentage=1.0 total_no_of_steps=5)
on_infobip_training_model_status(msg=AccountId=12358 ApplicationId=None 

In [None]:
# | export


def create_ws_server(
    assets_path: Path = Path("./assets"),
    start_process_for_username: Optional[str] = "infobip",
) -> Tuple[FastAPI, FastKafka]:
    """Create a FastKafka based web service

    Args:
        assets_path: Path to assets (should include favicon.ico)

    Returns:
        A FastKafka server
    """
    global description
    title = "airt service"
    version = airt_service.__version__
    contact = dict(name="airt.ai", url="https://airt.ai", email="info@airt.ai")
    openapi_url = "/openapi.json"
    favicon_url = "/assets/images/favicon.ico"
    assets_path = assets_path.resolve()
    favicon_path = assets_path / "images/favicon.ico"

    app = FastAPI(
        title=title,
        description=description,
        version=version,
        docs_url=None,
        redoc_url=None,
    )
    app.mount("/assets", StaticFiles(directory=assets_path), name="assets")

    asyncapi_path = Path("./asyncapi/docs").resolve()

    if asyncapi_path.exists():
        app.mount(
            "/asyncapi",
            StaticFiles(directory=asyncapi_path, html=True),
            name="asyncapi",
        )

    # attaches /token to routes
    app.include_router(auth_router)

    # attaches /datablob/* to routes
    app.include_router(datablob_router)

    # attaches /datasource/* to routes
    app.include_router(datasource_router)

    # attaches /model/* to routes
    app.include_router(model_train_router)

    # attaches /prediction/* to routes
    app.include_router(model_prediction_router)

    # attaches /user/* to routes
    app.include_router(user_router)

    @app.middleware("http")
    async def add_nosniff_x_content_type_options_header(
        request: Request, call_next: Callable[[Request], Response]
    ) -> Response:
        response: Response = await call_next(request)  # type: ignore
        response.headers["X-Content-Type-Options"] = "nosniff"
        response.headers["Strict-Transport-Security"] = "max-age=31536000"
        return response

    @app.get("/version")
    def get_versions() -> Dict[str, str]:
        return {"airt_service": airt_service.__version__}

    @app.get("/", include_in_schema=False)
    def redirect_root() -> RedirectResponse:
        return RedirectResponse("/docs")

    @app.get("/docs", include_in_schema=False)
    def overridden_swagger() -> HTMLResponse:
        return get_swagger_ui_html(
            openapi_url=openapi_url,
            title=title,
            swagger_favicon_url=favicon_url,
        )

    @app.get("/redoc", include_in_schema=False)
    def overridden_redoc() -> HTMLResponse:
        return get_redoc_html(
            openapi_url=openapi_url,
            title=title,
            redoc_favicon_url=favicon_url,
        )

    @app.get("/favicon.ico", include_in_schema=False)
    async def serve_favicon() -> FileResponse:
        return FileResponse(favicon_path)

    def custom_openapi() -> Dict[str, Any]:
        if app.openapi_schema:
            return app.openapi_schema

        fastapi_schema = get_openapi(
            title=title,
            description=description,
            version=version,
            routes=app.routes,
        )

        # ToDo: Figure out recursive dict merge
        fastapi_schema["servers"] = [
            {
                "url": "http://0.0.0.0:6006"
                if (
                    environ["DOMAIN"] == "localhost"
                    or "airt-service" in environ["DOMAIN"]
                )
                else f"https://{environ['DOMAIN']}",
                "description": "Server",
            },
        ]

        app.openapi_schema = fastapi_schema
        return app.openapi_schema

    app.openapi = custom_openapi  # type: ignore

    #     logger.info(f"kafka_config={aio_kafka_config}")

#     kafka_brokers = _construct_kafka_brokers()
    
#     exclude_keys = ['bootstrap_servers']
#     kafka_config = {k: aio_kafka_config[k] for k in set(list(aio_kafka_config.keys())) - set(exclude_keys)}

    fastkafka_app = create_fastkafka_application(start_process_for_username=start_process_for_username)

    return app, fastkafka_app

In [None]:
def create_fastapi_app(
    assets_path: Path = Path("../assets"),
) -> Tuple[FastAPI, FastKafka]:
    assets_path = assets_path.resolve()
    app, fastkafka_app = create_ws_server(assets_path=assets_path)
    return app, fastkafka_app

In [None]:
app, fastkafka_app = create_fastapi_app()
client = TestClient(app)

23-04-13 11:11:06.479 [INFO] fastkafka._application.app: run_in_background() : Adding function 'startup_event' as background task


In [None]:
# test_username = "johndoe"
# oauth_data = dict(
#     username=test_username, password=environ["AIRT_SERVICE_SUPER_USER_PASSWORD"]
# )

# response = client.post("/token", data=oauth_data)
# actual = response.json()
# display(actual)
# assert "access_token" in actual
# assert actual["token_type"] == "bearer"

In [None]:
import asyncio

import httpx


async def test_function():
    async with httpx.AsyncClient() as client:
        while True:
            try:
                await client.get("http://0.0.0.0:6006/docs")
                sanitized_print("docs retrieved")
            except httpx.ConnectError:
                sanitized_print("-", end="")
            except httpx.TimeoutException:
                sanitized_print(".", end="")
            except Exception as e:
                sanitized_print("?", end="")
                sanitized_print(e)
                raise e
            try:
                await asyncio.sleep(1)
            except asyncio.CancelledError:
                sanitized_print("\n*** task canceled ***")
                return "ok"


# task = asyncio.create_task(test_function())
# await asyncio.sleep(3)
# task.cancel()
# await asyncio.wait_for(task, timeout=2)
# task.result()

In [None]:
definitions = [
    "appLaunch",
    "sign_in",
    "sign_out",
    "add_to_cart",
    "purchase",
    "custom_event_1",
    "custom_event_2",
    "custom_event_3",
]


# applications = ["DriverApp", "PUBG", "COD"]
applications = ["DriverApp"]


def generate_n_rows_for_training_data(n: int, seed: int = 42):
    rng = np.random.default_rng(seed=seed)
    #     account_id = rng.choice([4000, 5000, 500], size=n)
    account_id = 1000
    definition_id = rng.choice(definitions, size=n)
    application_id = rng.choice(applications, size=n)
    model_id = rng.choice(["ChurnModelForDrivers", None], size=n)
    occurred_time_ticks = rng.integers(
        datetime(year=2022, month=1, day=1).timestamp() * 1000,
        datetime(year=2022, month=11, day=1).timestamp() * 1000,
        size=n,
    )
    occurred_time = pd.to_datetime(occurred_time_ticks, unit="ms").strftime(
        "%Y-%m-%dT%H:%M:%S.%f"
    )
    person_id = rng.integers(n // 10, size=n)

    df = pd.DataFrame(
        {
            "AccountId": account_id,
            "ApplicationId": application_id,
            "ModelId": model_id,
            "DefinitionId": definition_id,
            "OccurredTimeTicks": occurred_time_ticks,
            "OccurredTime": occurred_time,
            "PersonId": person_id,
        }
    )
    return json.loads(df.to_json(orient="records"))


generate_n_rows_for_training_data(100)[-1]

{'AccountId': 1000,
 'ApplicationId': 'DriverApp',
 'ModelId': None,
 'DefinitionId': 'sign_in',
 'OccurredTimeTicks': 1649146037462,
 'OccurredTime': '2022-04-05T08:07:17.462000',
 'PersonId': 4}

In [None]:
async def test_kafka_integration(tester):
    msg_count = 1000
    seed = 42

    mtr = ModelTrainingRequest(
        AccountId=1000,
        ApplicationId="DriverApp",
        model_type="churn",
        ModelId="ChurnModelForDrivers",
        total_no_of_records=msg_count,
    )

    await tester.to_infobip_start_training_data(mtr)

    training_data = generate_n_rows_for_training_data(msg_count, seed=seed)
    sanitized_print("Starting test production")
    for i in range(msg_count):
        await tester.to_infobip_training_data(EventData(**training_data[i]))
    sanitized_print("Stopping test production")

    sanitized_print("Starting test consumption")
    await tester.awaited_mocks.on_infobip_training_data_status.assert_awaited_with(
        TrainingDataStatus(
            AccountId=1000,
            ApplicationId="DriverApp",
            ModelId="ChurnModelForDrivers",
            no_of_records=999,
            total_no_of_records=msg_count,
        ),
        timeout=5 * 60,
    )

    with get_session_with_context() as session:
        user = session.exec(select(User).where(User.username == "infobip")).one()

        display(f"All events for account id {1000}")
        all_events = session.exec(
            select(TrainingStreamStatus)
            .where(TrainingStreamStatus.user == user)
            .where(TrainingStreamStatus.account_id == 1000)
        )
        display([e for e in all_events])


create_user_for_testing(username="infobip")
create_topics_for_user(username="infobip")
with set_env_variable_context(variable="JOB_EXECUTOR", value="fastapi"):
    with MonkeyPatch.context() as monkeypatch:
        monkeypatch.setattr(
            "airt_service.training_status_process.get_count_from_training_data_ch_table",
            lambda account_ids: pd.DataFrame(
                {
                    "curr_count": [999],
                    "AccountId": [1000],
                    "curr_check_on": [datetime.utcnow()],
                }
            ).set_index("AccountId"),
        )
        app, fastkafka_app = create_ws_server(assets_path=Path("../assets"))
        config = uvicorn.Config(app, host="0.0.0.0", port=6009, log_level="debug")

        async with Tester(fastkafka_app) as tester:
            # Server started.
            sanitized_print("server started")

            await test_kafka_integration(tester)

        sanitized_print("server stopped")
        # Server stopped.

23-04-13 11:11:06.855 [INFO] fastkafka._application.app: run_in_background() : Adding function 'startup_event' as background task
23-04-13 11:11:06.858 [INFO] fastkafka._components.test_dependencies: Java is already installed.
23-04-13 11:11:06.859 [INFO] fastkafka._components.test_dependencies: Kafka is installed.
23-04-13 11:11:06.860 [INFO] fastkafka._testing.local_broker: Starting zookeeper...


%4|1681384266.720|CONFWARN|rdkafka#producer-1| [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance
%4|1681384266.720|CONFWARN|rdkafka#producer-1| [thrd:app]: Configuration property auto.offset.reset is a consumer property and will be ignored by this producer instance


23-04-13 11:11:07.622 [INFO] fastkafka._testing.local_broker: Starting kafka...
23-04-13 11:11:09.519 [INFO] fastkafka._testing.local_broker: Local Kafka broker up and running on 127.0.0.1:9092
23-04-13 11:11:11.454 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:11:11.468 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:11:11.476 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:11:11.482 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:11:11.488 [INFO] fastkafka._application.app: _create_producer() : created producer using the config: '{'bootstrap_servers': '127.0.0.1:9092'}'
23-04-13 11:11:11.495 [INFO] fa

23-04-13 11:11:11.633 [INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'infobip_prediction'})
23-04-13 11:11:11.634 [INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'infobip_prediction'}
23-04-13 11:11:11.635 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer subscribed.
23-04-13 11:11:11.636 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer started.
23-04-13 11:11:11.637 [INFO] aiokafka.consumer.subscription_state: Updating subscribed topics to: frozenset({'infobip_model_metrics'})
23-04-13 11:11:11.638 [INFO] aiokafka.consumer.consumer: Subscribed to topic(s): {'infobip_model_metrics'}
23-04-13 11:11:11.638 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer subscribed.
23-04-13 11:11:11.640 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer started.
23-04-13 11:11:11.641 [INFO] aiokafka.co

23-04-13 11:11:12.409 [INFO] aiokafka.consumer.group_coordinator: Discovered coordinator 0 for group davor-redpanda:9092_group
23-04-13 11:11:12.409 [INFO] aiokafka.consumer.group_coordinator: Revoking previously assigned partitions set() for group davor-redpanda:9092_group
23-04-13 11:11:12.410 [INFO] aiokafka.consumer.group_coordinator: (Re-)joining group davor-redpanda:9092_group
23-04-13 11:11:12.412 [INFO] aiokafka.consumer.group_coordinator: Discovered coordinator 0 for group davor-redpanda:9092_group
23-04-13 11:11:12.413 [INFO] aiokafka.consumer.group_coordinator: Revoking previously assigned partitions set() for group davor-redpanda:9092_group
23-04-13 11:11:12.413 [INFO] aiokafka.consumer.group_coordinator: (Re-)joining group davor-redpanda:9092_group
server started
Starting test production
23-04-13 11:11:15.383 [INFO] aiokafka.consumer.group_coordinator: Revoking previously assigned partitions frozenset({TopicPartition(topic='infobip_training_data', partition=0)}) for group 

'All events for account id 1000'

[TrainingStreamStatus(account_id=1000, event=<TrainingEvent.start: 'start'>, model_id='ChurnModelForDrivers', count=0, created=datetime.datetime(2023, 4, 11, 9, 56, 4), uuid=UUID('379dce71-c04b-4fe5-a858-e2daa44722a2'), id=4, application_id='DriverApp', model_type='churn', total=1000, user_id=4),
 TrainingStreamStatus(account_id=1000, event=<TrainingEvent.upload: 'upload'>, model_id='ChurnModelForDrivers', count=999, created=datetime.datetime(2023, 4, 11, 9, 56, 11), uuid=UUID('2c661a76-55f3-4c5d-83c2-9a93a52aaa30'), id=5, application_id='DriverApp', model_type='churn', total=1000, user_id=4),
 TrainingStreamStatus(account_id=1000, event=<TrainingEvent.end: 'end'>, model_id='ChurnModelForDrivers', count=999, created=datetime.datetime(2023, 4, 13, 11, 11, 12), uuid=UUID('5a202562-9b14-4830-9f39-6402d8b45ff0'), id=10, application_id='DriverApp', model_type='churn', total=1000, user_id=4),
 TrainingStreamStatus(account_id=1000, event=<TrainingEvent.start: 'start'>, model_id='ChurnModelFor

23-04-13 11:11:15.642 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-04-13 11:11:15.644 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-04-13 11:11:15.672 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-04-13 11:11:15.673 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-04-13 11:11:15.674 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-04-13 11:11:15.674 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-04-13 11:11:15.676 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop(): Consumer stopped.
23-04-13 11:11:15.677 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_consumer_loop() finished.
23-04-13 11:11:15.678 [INFO] fastkafka._components.aiokafka_consumer_loop: aiokafka_

In [None]:
# | eval: false
# patching async.run so we can run FastAPI within notebook (Jupyter started its own processing loop already)

nest_asyncio.apply()

In [None]:
task = None


def start_fastapi_server(
    assets_path: Path = Path("../assets"),
    host: str = "0.0.0.0",
    port: int = 6006,
    test_function: Optional[Callable[[], Any]] = None,
):
    app, fastkafka_app = create_fastapi_app(
        assets_path=assets_path,
    )

    if test_function is not None:

        @app.on_event("startup")
        async def startup_event():
            global task
            task = asyncio.create_task(test_function())

        @app.on_event("shutdown")
        async def shutdown_event():
            global task
            task.cancel()
            await asyncio.wait_for(task, timeout=3)
            result = task.result()
            display(f"{result=}")

    uvicorn.run(app, host=host, port=port)

In [None]:
# | notest
# | eval: false

with MonkeyPatch.context() as monkeypatch:
    monkeypatch.setattr(
        "airt_service.training_status_process.get_count_from_training_data_ch_table",
        lambda account_ids: pd.DataFrame(
            {
                "curr_count": [999],
                "AccountId": [1000],
                "curr_check_on": [datetime.utcnow()],
            }
        ).set_index("AccountId"),
    )
    start_fastapi_server(test_function=test_function)

23-04-13 11:11:24.730 [INFO] fastkafka._application.app: run_in_background() : Adding function 'startup_event' as background task


INFO:     Started server process [13366]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:6006 (Press CTRL+C to quit)


INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO: 

docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs r

docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs r

docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs r

docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs retrieved
INFO:     127.0.0.1:37740 - "GET /docs HTTP/1.1" 200 OK
docs r

INFO:     Shutting down
INFO:     Waiting for application shutdown.



*** task canceled ***


"result='ok'"

INFO:     Application shutdown complete.
INFO:     Finished server process [13366]
