In [1]:
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split
import click
from rra_tools import jobmon
from pymer4.models.Lmer import Lmer

from spatial_temp_cgf import cli_options as clio
from spatial_temp_cgf import binning, scaling
from spatial_temp_cgf.data import DEFAULT_ROOT, ClimateMalnutritionData
from spatial_temp_cgf.model_specification import (
    ModelSpecification,
)

from spatial_temp_cgf.training.run_training import prepare_model_data

In [2]:
def prepare_model_version(model_specification_path):
    output_root = DEFAULT_ROOT

    # Build version and save spec
    model_spec = ModelSpecification.from_yaml(model_specification_path)
    measure = model_spec.measure
    measure_root = Path(output_root) / measure
    cm_data = ClimateMalnutritionData(measure_root)
    model_version = cm_data.new_model_version()
    model_spec.version.model = model_version
    cm_data.save_model_specification(model_spec, model_version)
    return model_version

# Prepare a test version.  Only need to run once to do testing
model_spec_path = "../specifications/stunting_test.yaml"
prepare_model_version(model_spec_path)

'2024_06_28.07'

In [3]:
output_root = Path(DEFAULT_ROOT)
measure = 'stunting'
model_version = '2024_06_28.06'
age_group_id = 4
sex_id = 1

In [4]:
cm_data = ClimateMalnutritionData(output_root / measure)
model_spec = cm_data.load_model_specification(model_version)

# Load training data
full_training_data = cm_data.load_training_data(model_spec.version.training_data)
# FIXME: Prep leaves a bad index
full_training_data = full_training_data.reset_index(drop=True)    
full_training_data['intercept'] = 1.

subset_mask = (
    (full_training_data.sex_id == sex_id) 
    & (full_training_data.age_group_id == age_group_id)
)

raw_df = full_training_data.loc[:, model_spec.raw_variables]    
null_mask = raw_df.isnull().any(axis=1)
assert null_mask.sum() == 0

df, var_info = prepare_model_data(raw_df, model_spec)

df = df.loc[subset_mask].reset_index(drop=True)

In [5]:
model_spec.lmer_formula

'stunting ~ (1 | ihme_loc_id) + temp + ldi_pc_pd + temp * ldi_pc_pd'

In [6]:
model = Lmer(model_spec.lmer_formula, data=df, family='binomial')
model.fit(verbose=True)


Fitting generalized linear model using glmer (family binomial) with Wald confidence intervals...

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: stunting~(1|ihme_loc_id)+temp+ldi_pc_pd+temp*ldi_pc_pd

Family: binomial	 Inference: parametric

Number of observations: 94627	 Groups: {'ihme_loc_id': 52.0}

Log-likelihood: -50902.242 	 AIC: 101814.483

Random effects:

                    Name   Var    Std
ihme_loc_id  (Intercept)  0.14  0.374

No random effect correlations specified

Fixed effects:



Unnamed: 0,Estimate,2.5_ci,97.5_ci,SE,OR,OR_2.5_ci,OR_97.5_ci,Prob,Prob_2.5_ci,Prob_97.5_ci,Z-stat,P-val,Sig
(Intercept),-0.999,-1.173,-0.824,0.089,0.368,0.309,0.439,0.269,0.236,0.305,-11.23,0.0,***
temp,-0.238,-0.418,-0.059,0.092,0.788,0.658,0.943,0.441,0.397,0.485,-2.599,0.009,**
ldi_pc_pd,2.877,0.976,4.777,0.97,17.754,2.654,118.78,0.947,0.726,0.992,2.966,0.003,**
temp:ldi_pc_pd,-7.472,-9.926,-5.019,1.252,0.001,0.0,0.007,0.001,0.0,0.007,-5.969,0.0,***


In [15]:
import numpy as np
size = 5000
x = pd.DataFrame({
    'temp': np.linspace(5, 50, size),
    'ldi_pc_pd': np.linspace(0, 500, size),
    'ihme_loc_id': np.nan * np.ones(size),
    'intercept': np.ones(size),
})

for predictor in model_spec.predictors:
    x, _ = scaling.scale_column(x, predictor.name, predictor.transform, var_info[predictor.name])



KeyError: "None of [Index(['temp'], dtype='object')] are in the [index]"

In [14]:
x

(0       1.0
 1       1.0
 2       1.0
 3       1.0
 4       1.0
        ... 
 4995    1.0
 4996    1.0
 4997    1.0
 4998    1.0
 4999    1.0
 Name: intercept, Length: 5000, dtype: float64,
 {})

In [None]:
model.predict(x, skip_data_checks=True, verify_predictions=False)

In [8]:
var_info

{'intercept': {},
 'temp': {'clip': False, 'copy': True, 'feature_range': (0, 1)},
 'ldi_pc_pd': {'clip': False, 'copy': True, 'feature_range': (0, 1)}}

[PredictorSpecification(name='intercept', transform=ScalingSpecification(type='scaling', strategy=<ScalingStrategy.IDENTITY: 'identity'>), random_effect='ihme_loc_id'),
 PredictorSpecification(name='temp', transform=ScalingSpecification(type='scaling', strategy=<ScalingStrategy.MIN_MAX: 'min_max'>), random_effect=''),
 PredictorSpecification(name='ldi_pc_pd', transform=ScalingSpecification(type='scaling', strategy=<ScalingStrategy.MIN_MAX: 'min_max'>), random_effect='')]