# Import

In [14]:
import mlflow
import pandas as pd
import lightgbm as lgb
import numpy as np

from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
from causalml.inference.meta import BaseTClassifier
from cate.dataset import Dataset, to_rank
from cate.utils import PathLinker, Timer, get_logger
from cate.mlflow import initialize

In [15]:
pathlinker = PathLinker().data.test
timer = Timer()
logger = get_logger("causalml")
experiment = initialize("test")

In [9]:
run = mlflow.start_run(experiment_id=experiment.experiment_id)

2024/10/26 09:28:57 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


# Functions

# Model

# Read Data

In [16]:
ds = Dataset.load(pathlinker.base)

# Model

In [17]:
base_classifier = lgb.LGBMClassifier(
    importance_type="gain", random_state=42, force_col_wise=True, n_jobs=-1
)
base_regressor = lgb.LGBMRegressor(
    importance_type="gain", random_state=42, force_col_wise=True, n_jobs=-1
)
base_models = {
    "tlearner": BaseTClassifier(base_classifier),
}

In [18]:
np.int = int  # type: ignore

In [None]:
skf = StratifiedKFold(5, shuffle=True, random_state=42)
for name, model in base_models.items():
    logger.info(f"start {name}")
    models = []
    indices = []
    propencity_scores = []
    for i, (train_idx, valid_idx) in tqdm(
        enumerate(skf.split(np.zeros(len(ds)), ds.y))
    ):
        train_X = ds.X.iloc[train_idx]
        train_y = ds.y.iloc[train_idx].to_numpy().reshape(-1)
        train_w = ds.w.iloc[train_idx].to_numpy().reshape(-1)
        valid_X = ds.X.iloc[valid_idx]
        valid_y = ds.y.iloc[valid_idx].to_numpy().reshape(-1)
        valid_w = ds.w.iloc[valid_idx].to_numpy().reshape(-1)

        timer.start(name, "train", i)
        model.fit(
            train_X,
            train_w,
            train_y,
            p=np.full(train_w.shape, train_w.mean()),
        )
        timer.stop(name, "train", i)
        propencity_score = train_w.mean()
        models.append(model)
        indices.append(valid_idx)
        propencity_scores.append(propencity_score)

    model_info = mlflow.sklearn.log_model(model, "model")
    result = mlflow.evaluate(
        model_info.model_uri,
        ds.to_pandas(),
        targets=ds.w_columns + ds.y_columns,
        feature_names=ds.x_columns,
        extra_metrics = [],
        custom_artifacts = []
    )

In [7]:
from typing import Literal


def hoge(i) -> tuple[Literal[1], Literal[2]]:
    return i, i**2

dict(hoge(i) for i in range(10))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}