## Sorsogon. Step 2.a Dynamic Sampling Model  and GREGWT

In [1]:
import datetime; print(datetime.datetime.now())

2018-03-26 01:28:43.554147


**Notebook abstract**

This notebook shows the main sampling and reweighting algorithm.

### Import libraries

In [2]:
from smum.microsim.run import run_calibrated_model
from smum.microsim.table import TableModel

  from ._conv import register_converters as _register_converters


### Global variables

In [3]:
iterations = 1000
benchmark_year = 2016
census_file = 'data/benchmarks_year_bias.csv'
typ = 'resampled'
model_name = 'Sorsogon_Electricity_Water_wbias_projected_dynamic_{}'.format(typ)
verbose = False
#The number of chains to run in parallel. 
njobs = 4

### Define Table model

In [4]:
tm = TableModel(census_file = census_file, verbose=verbose)

#### Income model

In [5]:
tm.add_model('data/table_inc.csv',   'Income')
tm.update_dynamic_model('Income', specific_col = 'Education')
tm.update_dynamic_model('Income',
                        specific_col = 'FamilySize',
                        specific_col_as = 'Size',
                        val = 'mu', compute_average =  0)
tm.update_dynamic_model('Income',
                        specific_col = 'Age',
                        val = 'mu', compute_average =  0)

In [6]:
tm.models['Income'].loc[2020]

Unnamed: 0,co_mu,co_sd,p,mu,sd,dis,ub,lb
i_Intercept,,,1147.66,,,Deterministic,,
i_Sex,919.012059036333,161.50344091572538,0.243795,,,Bernoulli,,
i_Urbanity,7105.2244566329355,127.94148635675795,0.6356,,,Bernoulli,,
i_FamilySize,1666.846395220964,29.03482607534048,,3.70878,1.83794,Poisson,10.0,1.0
i_Age,116.57589770606201,4.681393204635,,52.5153,12.2451,Normal,100.0,18.0
i_Education,"1.0,6023.86254599,11959.091528,18727.4606703,1...","1e-10,140.904404522,217.208790314,282.17614554...","0.2430379746835443,0.21581625995041107,0.25540...",,,Categorical,,


In [7]:
formula_inc = "i_Intercept+"+"+".join(
    ["c_{0} * {0}".format(e) for e in tm.models['Income'][benchmark_year].index if\
        (e != 'i_Intercept')])
tm.add_formula(formula_inc, 'Income')

In [8]:
tm.print_formula('Income')

Income =
	 i_Intercept +
	 c_i_Sex * i_Sex +
	 c_i_Urbanity * i_Urbanity +
	 c_i_FamilySize * i_FamilySize +
	 c_i_Age * i_Age +
	 c_i_Education * i_Education +


#### Electricity model

In [9]:
tm.add_model('data/table_elec.csv',  'Electricity', reference_cat = ['yes'])
tm.update_dynamic_model('Electricity', specific_col = 'Income', val = 'mu', compute_average = False)

In [10]:
tm.models['Electricity'].loc[2016]

Unnamed: 0,co_mu,co_sd,p,mu,sd,dis,ub,lb
e_Intercept,,,3.29998,,,Deterministic,,
e_Lighting,0.825662,18.6676,0.946022,,,Bernoulli,,
e_TV,18.7899,1.75962,0.964932,,,Bernoulli,,
e_Cooking,28.8862,1.96894,0.0142662,,,Bernoulli,,
e_Refrigeration,59.2432,1.55605,0.602102,,,Bernoulli,,
e_AC,203.323,3.13016,0.256521,,,Bernoulli,,
e_Urban,24.5935,1.39104,1.0,,,Bernoulli,,
e_Income,0.00142607,4.10201e-05,,190472.0,1904.72,,inf,0.0


In [11]:
formula_elec = "e_Intercept+"+"+".join(
    ["c_{0} * {0}".format(e) for e in tm.models['Electricity'][benchmark_year].index if\
        (e != 'e_Intercept') &\
        (e != 'e_Income') &\
        (e != 'e_Urban')
    ])
formula_elec += '+c_e_Urban * i_Urbanity'
formula_elec += '+c_e_{0} * {0}'.format('Income')

In [12]:
tm.add_formula(formula_elec, 'Electricity')

In [13]:
tm.print_formula('Electricity')

Electricity =
	 e_Intercept +
	 c_e_Lighting * e_Lighting +
	 c_e_TV * e_TV +
	 c_e_Cooking * e_Cooking +
	 c_e_Refrigeration * e_Refrigeration +
	 c_e_AC * e_AC +
	 c_e_Urban * i_Urbanity +
	 c_e_Income * Income +


#### Water model

In [14]:
tm.add_model('data/table_water.csv', 'Water')
tm.update_dynamic_model('Water', specific_col = 'Education')
tm.update_dynamic_model('Water',
                        specific_col = 'FamilySize',
                        specific_col_as = 'Size',
                        val = 'mu', compute_average =  0)
tm.update_dynamic_model('Water',
                        specific_col = 'Age',
                        val = 'mu', compute_average =  0)

In [15]:
tm.models['Water'].loc[2020]

Unnamed: 0,co_mu,co_sd,p,dis,mu,sd,ub,lb
w_Intercept,,,-601.592,Deterministic,,,,
w_Sex,98.49504620801835,29.44380722589748,0.243795,,,,,
w_Urbanity,1000.9789077676428,25.415910606032206,0.6356,,,,,
w_Total_Family_Income,0.05318701200857999,0.0009823058551951082,,,,,,
w_FamilySize,49.73935151831777,5.897790558149098,,,3.70878,1.83794,,
w_Age,6.088941881654669,0.9127405886772298,,,52.5153,12.2451,,
w_Education,"1.0,214.4011453125436,260.32727427717964,101.7...","1e-10,28.815802440470176,40.0574490885231,49.9...","0.2430379746835443,0.21581625995041107,0.25540...",None;i;Categorical,,,,


In [16]:
formula_water = "w_Intercept+"+"+".join(
    ["c_{0} * {1}".format(e, "i_"+"_".join(e.split('_')[1:]))\
         for e in tm.models['Water'][benchmark_year].index if \
                                 (e != 'w_Intercept') &\
                                 (e != 'w_Total_Family_Income')   &\
                                 (e != 'w_Education')
    ])
formula_water += '+c_w_Total_Family_Income*Income'
formula_water += '+c_w_Education*i_Education'

In [17]:
tm.add_formula(formula_water, 'Water')

In [18]:
tm.print_formula('Water')

Water =
	 w_Intercept +
	 c_w_Sex * i_Sex +
	 c_w_Urbanity * i_Urbanity +
	 c_w_FamilySize * i_FamilySize +
	 c_w_Age * i_Age +
	 c_w_Total_Family_Income*Income +
	 c_w_Education*i_Education +


#### Make model and save it to excel

In [19]:
table_model = tm.make_model()

In [20]:
tm.to_excel()

creating data/tableModel_Income.xlsx
creating data/tableModel_Electricity.xlsx
creating data/tableModel_Water.xlsx


### Define model variables

In [21]:
labels = ['age_0_18', 'age_19_25', 'age_26_35',
          'age_36_45', 'age_46_55', 'age_56_65',
          'age_66_75', 'age_76_85', 'age_86_100']
cut = [0, 19, 26, 36, 46, 56, 66, 76, 86, 101]
to_cat = {'i_Age':[cut, labels]}
drop_col_survey = ['e_Income', 'e_Urban', 'w_Total_Family_Income', 'w_Education']

In [22]:
fw = run_calibrated_model(
    table_model,
    project = typ,
    njobs = njobs,
    #rep = {'FamilySize': ['Size']},
    #rep={'urb': ['urban', 'urbanity']},
    census_file = census_file,
    year = benchmark_year,
    population_size = False,
    name = '{}_{}'.format(model_name, iterations),
    to_cat = to_cat,
    iterations = iterations,
    verbose = verbose,
    drop_col_survey = drop_col_survey)

loop: 1/4; calibrating: Income; sufix = loop_1
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:31<00:00, 31.80it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


loop: 2/4; calibrating: Electricity; sufix = loop_2
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 39.28it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


loop: 3/4; calibrating: Water; sufix = loop_3
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 38.96it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


loop: 4/4; final loop, for variables: Income, Electricity, Water; sufix = loop_4
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:26<00:00, 37.46it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


Calibration Error:
	0.0000E+00  Income
	-1.6763E-05  Electricity
	9.8753E-01  Water
Projecting sample survey for 21 steps via resample
resampling for year 2010
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:31<00:00, 32.26it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2011
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:31<00:00, 32.15it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2012
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:33<00:00, 30.08it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2013
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 38.93it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2014
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:30<00:00, 32.67it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2015
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:28<00:00, 34.77it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2016
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 39.10it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2017
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:29<00:00, 33.54it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2018
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 39.69it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2019
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:32<00:00, 30.64it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2020
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 39.33it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2021
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:30<00:00, 32.93it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2022
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:30<00:00, 32.81it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2023
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 39.66it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2024
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:27<00:00, 36.33it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2025
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:31<00:00, 32.25it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2026
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:27<00:00, 37.21it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2027
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:28<00:00, 34.89it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2028
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:26<00:00, 38.01it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2029
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:26<00:00, 37.64it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.


resampling for year 2030
Computing model:  Income
Computing model:  Electricity
Computing model:  Water


100%|██████████| 1005/1005 [00:25<00:00, 38.83it/s]
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
Tuning was enabled throughout the whole trace.
The estimated number of effective samples is smaller than 200 for some parameters.
