# Import Modules

In [1]:
import importlib
import os
import sys

import joblib
import numpy as np
import pandas as pd
import polars as pl

In [2]:
os.chdir("../")
sys.path.insert(0, os.getcwd())

In [3]:
from morai.experience import tables
from morai.forecast import models, preprocessors
from morai.utils import custom_logger, helpers

In [4]:
logger = custom_logger.setup_logging(__name__)

In [5]:
# update log level if wanting more logging
custom_logger.set_log_level("INFO")

In [6]:
pd.options.display.float_format = "{:,.2f}".format

# Load Data

In [16]:
pl_parquet_path = r"files/dataset/model_data.parquet"

In [17]:
# reading in the dataset
# `enable_string_cache` helps with categorical type values
pl.enable_string_cache()
lzdf = pl.scan_parquet(
    pl_parquet_path,
)

In [18]:
initial_row_count = lzdf.select(pl.len()).collect().item()
print(
    f"row count: {initial_row_count:,} \n"
    f"exposures: {lzdf.select([pl.col('amount_exposed').sum()]).collect()[0,0]:,}"
)

row count: 1,091,302 
exposures: 4,332,301,371,494.32


In [19]:
model_data = lzdf.collect()

In [20]:
model_data = model_data.to_pandas()

## Preprocess

In [21]:
feature_dict = {
    "target": ["qx_raw"],
    "weight": ["amount_exposed"],
    "passthrough": ["attained_age", "duration", "observation_year"],
    "ordinal": [
        "sex",
        "smoker_status",
    ],
    "ohe": [
        "binned_face",
        "insurance_plan",
        "class_enh",
    ],
    "nominal": [],
}

In [22]:
preprocess_dict = preprocessors.preprocess_data(
    model_data,
    feature_dict=feature_dict,
    standardize=False,
    add_constant=True,
)

[37m 2024-07-07 15:42:06 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m model target: ['qx_raw'] [0m
[37m 2024-07-07 15:42:06 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m model weights: ['amount_exposed'] [0m
[37m 2024-07-07 15:42:06 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m adding a constant column to the data [0m
[37m 2024-07-07 15:42:06 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m passthrough - (generally numeric): ['attained_age', 'duration', 'observation_year', 'constant'] [0m
[37m 2024-07-07 15:42:06 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m ordinal - ordinal encoded: ['smoker_status', 'sex'] [0m
[37m 2024-07-07 15:42:07 [0m|[37m morai.forecast.preprocessors [0m|[32m INFO     [0m|[32m nominal - one hot encoded (dropping first col): ['binned_face', 'class_enh', 'insurance_plan'] [0m


In [23]:
X = preprocess_dict["X"]
y = preprocess_dict["y"]
weights = preprocess_dict["weights"]
mapping = preprocess_dict["mapping"]
md_encoded = preprocess_dict["md_encoded"]
model_features = preprocess_dict["model_features"]

# Load Model

In [24]:
model_name = "glm"
GLM = models.GLM()
GLM.model = joblib.load(f"files/models/{model_name}.joblib")
logger.info(f"loaded model '{model_name}'")

[37m 2024-07-07 15:42:11 [0m|[37m __main__ [0m|[32m INFO     [0m|[32m loaded model 'glm' [0m


# Create Table

## Predict

In [25]:
mapping = preprocessors.update_mapping(mapping=mapping, key='attained_age', values=(0,121))
mapping = preprocessors.update_mapping(mapping=mapping, key='duration', values=(1,122))

In [30]:
rate_table, mult_table = tables.generate_table(
    model=GLM.model,
    preprocess_mapping=preprocess_dict["mapping"],
    preprocess_feature_dict=preprocess_dict["feature_dict"],
    preprocess_params=preprocess_dict["params"],
    grid=None,
    mult_features=["observation_year", "insurance_plan", "binned_face", "class_enh"],
)
rate_table = tables.check_aa_ia_dur_cols(rate_table)

[37m 2024-07-07 15:48:35 [0m|[37m morai.experience.tables [0m|[32m INFO     [0m|[32m generating table for model GLMResultsWrapper [0m
[37m 2024-07-07 15:48:36 [0m|[37m morai.experience.tables [0m|[32m INFO     [0m|[32m mult_table rows: 30 [0m
[37m 2024-07-07 15:48:36 [0m|[37m morai.experience.tables [0m|[32m INFO     [0m|[32m rate_table shape: (30496, 6) [0m
[37m 2024-07-07 15:48:36 [0m|[37m morai.experience.tables [0m|[32m INFO     [0m|[32m Removed '488' rows where attained_age, issue_age, or duration was invalid. [0m


## Output

In [28]:
tables.output_table(rate_table=rate_table, filename="glm.xlsx", mult_table=mult_table)

[37m 2024-07-07 15:44:41 [0m|[37m morai.experience.tables [0m|[32m INFO     [0m|[32m saving table to C:\Users\johnk\Desktop\github\morai\files\dataset\tables\glm.xlsx [0m
