In [31]:
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split
import click
from rra_tools import jobmon
from pymer4.models.Lmer import Lmer
from pymer4.models.Lm import Lm

from spatial_temp_cgf import cli_options as clio
from spatial_temp_cgf import binning, scaling
from spatial_temp_cgf.data import DEFAULT_ROOT, ClimateMalnutritionData
from spatial_temp_cgf.model_specification import (
    ModelSpecification,
)

from spatial_temp_cgf.training.run_training import prepare_model_data

In [32]:
def prepare_model_version(model_specification_path):
    output_root = DEFAULT_ROOT

    # Build version and save spec
    model_spec = ModelSpecification.from_yaml(model_specification_path)
    measure = model_spec.measure
    measure_root = Path(output_root) / measure
    cm_data = ClimateMalnutritionData(measure_root)
    model_version = cm_data.new_model_version()
    model_spec.version.model = model_version
    cm_data.save_model_specification(model_spec, model_version)
    return model_version

# Prepare a test version.  Only need to run once to do testing
# model_spec_path = "../specifications/stunting_test.yaml"
# prepare_model_version(model_spec_path)

In [33]:
output_root = Path(DEFAULT_ROOT)
measure = 'stunting'
model_version = '2024_06_28.04'
age_group_id = 5
sex_id = 1

In [34]:
cm_data = ClimateMalnutritionData(output_root / measure)
model_spec = cm_data.load_model_specification(model_version)

# Load training data
full_training_data = cm_data.load_training_data(model_spec.version.training_data)
# FIXME: Prep leaves a bad index
full_training_data = full_training_data.reset_index(drop=True)    
full_training_data['intercept'] = 1.

subset_mask = (
    (full_training_data.sex_id == sex_id) 
    & (full_training_data.age_group_id == age_group_id)
)

raw_df = full_training_data.loc[:, model_spec.raw_variables]    
null_mask = raw_df.isnull().any(axis=1)
assert null_mask.sum() == 0

df, var_info = prepare_model_data(raw_df, model_spec)

df = df.loc[subset_mask].reset_index(drop=True)

In [35]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# fig, axes = plt.subplots(figsize=(12, 30), nrows=5)

# vmin = 0
# vmax = 5000

# sns.heatmap(df.groupby(['grid_cell', 'ihme_loc_id']).stunting.count().unstack(), ax=axes[0], vmin=vmin, vmax=vmax)
# sns.heatmap(df.groupby(['temp', 'ihme_loc_id']).stunting.count().unstack(), ax=axes[1], vmin=vmin, vmax=vmax)
# sns.heatmap(df.groupby(['ldi_pc_pd', 'ihme_loc_id']).stunting.count().unstack(), ax=axes[2], vmin=vmin, vmax=vmax)
# sns.heatmap(df.groupby(['ldi_pc_pd', 'temp']).stunting.count().unstack(), ax=axes[3])
# sns.heatmap(df.groupby(['ldi_pc_pd', 'temp']).stunting.mean().unstack().sort_index(ascending=False), ax=axes[4], cmap='RdYlBu_r')
# fig.tight_layout()

In [None]:
model = Lmer("stunting ~ (1 | location) + ldi_pc_pd + temp + temp*ldi_pc_pd", data=test_df, family='binomial')
model.fit(verbose=True)

In [None]:
model.coefs['Estimate'].


In [None]:
print(model_spec.lmer_formula)
model = Lmer(model_spec.lmer_formula, data=df, family='binomial')
model.fit(verbose=True)
if not model.fitted:
    raise ValueError(f"Model failed to converge.")

In [None]:
import seaborn as sns
sns.heatmap(
    df.groupby(['temp', 'ldi_pc_pd']).stunting.mean().sort_index().unstack(), 
    #vmax=0.5,
    #vmin=0,
    cmap='hot_r')

In [None]:
help(model.design_matrix)