In [1]:
import json
import logging
import re
import warnings
from pathlib import Path
from pprint import pprint
from typing import Annotated, Any, Generator, Literal, Type, TypeVar

# Standard imports
import numpy as np
import numpy.typing as npt
import pandas as pd
import polars as pl

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=200)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)


def create_path(path: str | Path) -> None:
    """
    Create parent directories for the given path if they don't exist.

    Parameters
    ----------
    path : str | Path
        The file path for which to create parent directories.

    """
    Path(path).parent.mkdir(parents=True, exist_ok=True)


def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=1)

/Users/neidu/Desktop/Projects/Personal/batch-process


In [4]:
from uuid import uuid4, UUID

from config import app_config
from schemas.db_models import TaskResult, add_record_to_db, get_db_session, init_db

init_db()

In [5]:
from datetime import datetime

from pydantic import BaseModel, Field


class TaskSchema(BaseModel):
    """Data schema for task results."""

    task_id: str = Field(default_factory=lambda: uuid4().hex, description="Task id")
    task_name: str = Field(description="Task id")
    status: Literal["pending", "completed"] = Field(
        default="pending", description="Task status"
    )
    result: dict[str, Any] = Field(default_factory=dict, description="Task result")
    error_message: str = Field(default="", description="Error message")
    created_at: str = Field(default_factory=datetime.now, description="Creation time")
    completed_at: str | None = Field(default=None, description="Completion time")

    def to_data_model_dict(self) -> dict[str, Any]:
        return json.loads(self.model_dump_json())

In [6]:
_data = TaskSchema(
    task_name="ner",
    status="completed",
    result={},
    error_message="",
)
# console.print(_data.model_dump())

res = TaskResult(**_data.to_data_model_dict())
console.print(res)

In [7]:
input_data: dict[str, Any] = {
    "task_name": "ner",
    "status": "completed",
    "result": {},
}

# result = add_record_to_db(input_data, schema=TaskSchema, data_model=TaskResult)
with get_db_session() as db:
    record = TaskResult(**TaskSchema(**input_data).to_data_model_dict())
    db.add(record)
    db.flush()

    out = {
        "id": record.id,
        "task_id": record.task_id,
        "task_name": record.task_name,
        "status": record.status,
        "result": record.result,
        "error_message": record.error_message,
        "created_at": record.created_at,
        "completed_at": record.completed_at,
    }
    console.print(out)

In [8]:
input_data: dict[str, Any] = {
    "task_name": "ner_2",
    "status": "completed",
    "result": {},
}

out = add_record_to_db(input_data, schema=TaskSchema, data_model=TaskResult)
console.print(out)