# Test Fitting Mixture of Gaussian on Peaked Locations

In [None]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import curvefit
import curvefit.core.utils as utils
from scipy.optimize import minimize
from copy import deepcopy
from scipy import stats
from pprint import pprint
from curvefit.pipelines.flat_asymmetric_model import APFlatAsymmetricModel
from curvefit.pipelines.ap_model import APModel
from curvefit.core.utils import get_obs_se, local_smoother, df_to_mat
from curvefit.core.functions import ln_gaussian_cdf, ln_gaussian_pdf, gaussian_cdf, gaussian_pdf

In [None]:
peaked_groups = [
    'Abruzzo',
    'Andalucia',
    'Baden-Wurttemberg',
    'Basque Country',
    'Bavaria',
    'Campania',
    'Canary Islands',
    'Castile and Leon',
    'Catalonia',
    'Colorado',
    'Community of Madrid',
    'Emilia-Romagna',
    'Extremadura',
    'Friuli-Venezia Giulia',
    'Galicia',
    'King and Snohomish Counties (excluding Life Care Center), WA',
    'La Rioja',
    'Lazio',
    'Liguria',
    'Lombardia',
    'Marche',
    'Navarre',
    'New York',
    'Other Counties, WA',
    'Provincia autonoma di Bolzano',
    'Provincia autonoma di Trento',
    'Valencian Community',
    "Valle d'Aosta",
    'Veneto',
    'Wuhan City, Hubei'
]

In [None]:
file_path_data = '../data/New York.csv'
file_path_covariate = '../data/New York covariate.csv'

In [None]:
df = pd.read_csv(file_path_data)
df_cov = pd.read_csv(file_path_covariate)

df_cov = df_cov.rename(columns={'location': 'Location'})
df = pd.merge(df, df_cov, on='Location', how='inner').copy()

df['intercept'] = 1.0

df['smooth death rate'] = np.exp(df['ln(age-standardized death rate)'])

In [None]:
df = curvefit.core.utils.process_input(df, 'Location', 'Days', 'smooth death rate',
                                       col_covs=['cov_1w', 'intercept'])

## Create Pipeline

In [None]:
basic_info_dict = dict(
    all_cov_names=['cov_1w'],
    col_t='days',
    col_group='location',
    predict_space=ln_gaussian_pdf,
    col_obs_compare='ln asddr',
    peaked_groups=peaked_groups
)

basic_model_dict = dict(
    param_names=['alpha', 'beta', 'p'],
    col_covs=[['intercept'], ['cov_1w'], ['intercept']],
    link_fun=[np.exp, lambda x: x, np.exp],
    var_link_fun=[lambda x: x, lambda x: x, lambda x: x]
)

In [None]:
# basic fit parameter
dummy_gprior = [0.0, np.inf]
dummy_uprior = [-np.inf, np.inf]
zero_uprior = [0.0, 0.0]
fe_init = np.array([-3, 28.0, -8.05])
fe_bounds = [[-np.inf, 0.0], [15.0, 100.0], [-15, -6]]
options = {
    'ftol': 1e-10,
    'gtol': 1e-10,
    'maxiter': 500,
    'disp': True
}

basic_fit_dict = dict(
    fe_init=fe_init,
    fe_bounds=fe_bounds,
    re_bounds=[zero_uprior]*3,
    fe_gprior=[dummy_gprior]*3,
    re_gprior=[dummy_gprior]*3,
    options=options
)

basic_joint_model_fit_dict = dict(
    fe_gprior=[dummy_gprior]*3,
    re_bounds=[dummy_uprior]*3,
    re_gprior=[dummy_gprior, [0.0, 10.0], dummy_gprior],
    smart_initialize=True,
    smart_init_options=options,
    options={
        'ftol': 1e-10,
        'gtol': 1e-10,
        'maxiter': 10,
        'disp': True
    }
)

In [None]:
model_info_dict = {
    **deepcopy(basic_info_dict),
    'fun': ln_gaussian_cdf,
    'col_obs': 'ln ascdr',
    'obs_se_func': lambda x: (1. / (1. + x)),
    'prior_modifier': lambda x: 10**(min(0.0, max(-1.0,
                0.1*x - 1.5
    ))),
}

In [None]:
fit_dict = {
    **deepcopy(basic_fit_dict),
    'fun_gprior': [lambda params: params[0] * params[1], [np.exp(0.7), 1e-1]]
}

In [None]:
gm_fit_dict = {
    'bounds': np.repeat(np.array([[0.0, np.inf]]), 13, axis=0)
}

In [None]:
pipeline_model = APFlatAsymmetricModel(
    beta_stride=2,
    mixture_size=13,
    daily_col='asddr',
    gm_fit_threshold=15,
    all_data=df,
    **model_info_dict,
    joint_model_fit_dict=basic_joint_model_fit_dict,
    basic_model_dict=basic_model_dict,
    fit_dict=fit_dict
)

In [None]:
pipeline_model.gm_fit_dict['bounds']

In [None]:
pipeline_model.run(
    n_draws=1000, prediction_times=np.arange(100), cv_lower_threshold=1e-3,
    smoothed_radius=[3, 3], num_smooths=2, exclude_groups=['Wuhan City, Hubei'],
    exclude_below=0, cv_upper_threshold=4.
)

In [None]:
pipeline_model.plot_results(prediction_times=np.arange(100),
                           draw_space=curvefit.core.functions.gaussian_pdf,
                           plot_obs='asddr', groups=None)
plt.savefig("/Users/mnorwood/OneDrive/Documents/covid/results_mixtures_7.png")