In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging
import pathlib
import pprint
from logging import Formatter
from logging.handlers import RotatingFileHandler

from hydra import compose, initialize_config_dir
from rich.console import Console
from rich.logging import RichHandler

from topollm.config_classes.constants import HYDRA_CONFIGS_BASE_PATH
from topollm.config_classes.main_config import MainConfig
from topollm.logging.initialize_configuration_and_log import initialize_configuration

LOGFORMAT_FILE = "[%(asctime)s][%(levelname)8s][%(name)s] %(message)s (%(filename)s:%(lineno)s)"
LOGFORMAT_RICH = "%(message)s"

error_console = Console(stderr=True)

rich_handler = RichHandler(console=error_console)

rich_handler.setFormatter(Formatter(LOGFORMAT_RICH))

rotating_file_path = pathlib.Path(
    "logs",
    "load_tokenizer_and_model.log",
)
rotating_file_path.parent.mkdir(exist_ok=True, parents=True)
rotating_file_handler = RotatingFileHandler(
    rotating_file_path,
    maxBytes=1024 * 1024 * 10,  # 10Mb
    backupCount=10,
)

logging.basicConfig(
    level=logging.INFO,
    format=LOGFORMAT_FILE,
    handlers=[
        rich_handler,
        rotating_file_handler,
    ],
)

logger = logging.getLogger("load_tokenizer_and_model")

In [None]:
# Using hydra in Jupyter notebook:
# https://github.com/facebookresearch/hydra/blob/main/examples/jupyter_notebooks/compose_configs_in_notebook.ipynb
abs_config_dir = pathlib.Path(HYDRA_CONFIGS_BASE_PATH)
with initialize_config_dir(version_base=None, config_dir=str(abs_config_dir)):
    config = compose(
        config_name="main_config",
        overrides=["feature_flags.finetuning.use_wandb=false", "finetuning=finetuning_for_token_classification"],
        return_hydra_config=True,
    )
    logger.info(pprint.pformat(config, indent=4))

    main_config: MainConfig = initialize_configuration(
        config=config,
        logger=logger,
    )