In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from mrtool import MRData, LinearCovModel
from gkmodel import OverallModel, StudyModel, TwoStageModel, StagewiseModel, result_to_df

In [None]:
indicator = 'smoking'
df = pd.read_csv(f'../data/{indicator}.as.csv')

In [None]:
df.head(3)

In [None]:
data_stage1 = MRData()
data_stage1.load_df(
    df,
    col_obs=f'{indicator}_logit',
    col_obs_se=f'{indicator}_logit_se',
    col_covs=['sdi', 'year_id'],
    col_study_id='ls_id'
)

In [None]:
cov_models1 = [
    LinearCovModel('intercept'),
    LinearCovModel('sdi',
                   use_spline=True,
                   spline_knots=np.linspace(0.0, 1.0, 5),
                   spline_l_linear=True,
                   spline_r_linear=True)
]
cov_models2 = [LinearCovModel('intercept'), LinearCovModel('year_id')]

### Using TwoStage Model

In [None]:
cov_models1 = [
    LinearCovModel('intercept'),
    LinearCovModel('sdi',
                   use_spline=True,
                   spline_knots=np.linspace(0.0, 1.0, 5),
                   spline_l_linear=True,
                   spline_r_linear=True)
]
cov_models2 = [LinearCovModel('intercept'), LinearCovModel('year_id')]

In [None]:
tsmodel = TwoStageModel(data_stage1, cov_models1, cov_models2)

In [None]:
tsmodel.fit_model()

In [None]:
two_stage_result = result_to_df(tsmodel, tsmodel.data1)

### Using Three Stages

In [None]:
cov_models1 = [
    LinearCovModel('intercept'),
    LinearCovModel('sdi',
                   use_spline=True,
                   spline_knots=np.linspace(0.0, 1.0, 5),
                   spline_l_linear=True,
                   spline_r_linear=True)
]
cov_models2 = [LinearCovModel('intercept'), LinearCovModel('sdi')]
cov_models3 = [LinearCovModel('intercept'), LinearCovModel('year_id')]

In [None]:
swmodel = StagewiseModel(data_stage1, [OverallModel(cov_models=cov_models1),
                                       StudyModel(cov_models=cov_models2),
                                       StudyModel(cov_models=cov_models3)])

In [None]:
swmodel.fit_model()

In [None]:
swmodel.write_soln(0)

In [None]:
swmodel.write_soln(1)

In [None]:
swmodel.write_soln(2)

In [None]:
three_stage_result = result_to_df(swmodel, swmodel.data_list[0])

### Compare prediction

- prediction from TwoStageModel

In [None]:
prediction_ts = tsmodel.predict()

- prediction from stagewise model

In [None]:
prediction_sw = swmodel.predict()

### Plotting Poland and Georgia

In [None]:
import plotly.express as px

In [None]:
two_stage_result.study_id = two_stage_result.study_id.astype('category')
pg_two_stage = two_stage_result.query('study_id == [511, 512, 351, 352]').copy()
pg_two_stage.loc[:, 'method'] = 'two_stage'
three_stage_result.study_id = three_stage_result.study_id.astype('category')
pg_three_stage = three_stage_result.query('study_id == [511, 512, 351, 352]').copy()
pg_three_stage.loc[:, 'method'] = 'three_stage'
pg_compare = pd.concat([pg_two_stage, pg_three_stage])
pg_compare

In [None]:
px.scatter(pg_compare, x='year_id', y='prediction', color='method', facet_row='study_id', width=700, height=1000).update_traces(mode='lines+markers')