# Microsimulation with artificial survey data 

In [None]:
import matplotlib.pyplot as plt  # For graphics
%matplotlib inline

import numpy as np  # linear algebra and math
import pandas as pd  # data frames

from openfisca_senegal import SenegalTaxBenefitSystem  # The Senegalese tax-benefits system

from openfisca_senegal.survey_scenarios import SenegalSurveyScenario

## Building the artificial data

Sénégal is composed by almost 15 millions people dispatched in around 1.6 million household

In [None]:
household_weight = 100
size = int(1.6e6 / household_weight)
print "Size of the sample: {}".format(size)

We assume that 2/3 of the household heads are married and that only married houshold do have children. The mean number of children per household is 5 and is normally distributed

In [None]:
est_marie = np.random.binomial(1, .66, size = size)
est_celibataire = np.logical_not(est_marie)
nombre_enfants = np.maximum(
    est_marie * np.round(np.random.normal(5, scale = 3, size = size)),
    0,
    )

We assume that 80% of the population are wage earners.
We choose a mean wage of 5 000 0000 CFA with a log normal ditribution.
Since 
$$ \text{mean wage}  = e^{\mu + \frac{\sigma ^ 2}{2}} $$ 
and
$$ \text{median wage} = e^\mu $$
we can compute the distribution according to the following expressions.

In [None]:
mean_wage = 5e6
median_wage = .75 * mean_wage
est_salarie = np.random.binomial(1, .8, size = size)
mu = np.log(median_wage)
sigma = np.sqrt(2 * np.log(mean_wage / median_wage))
salaire = (
    est_salarie * 
    np.random.lognormal(mean = mu, sigma = sigma, size = int(size))
    )


We choose a mean pension of 2 500 000 CFA

In [None]:
mean_pension = 2.5e6
median_pension = .9 * mean_pension

In [None]:
mu = np.log(median_pension)
sigma = np.sqrt(2 * np.log(mean_pension / median_pension))
pension_retraite = (
    np.logical_not(est_salarie) *
    np.random.lognormal(mean = mu, sigma = sigma, size = int(size))
    )

In [None]:
input_data_frame = pd.DataFrame({
    'est_marie': est_marie,
    'est_celibataire': est_celibataire,
    'nombre_enfants': nombre_enfants,
    'pension_retraite': pension_retraite,
    'salaire': salaire,
    'id_famille': range(size),
    'role_famille': 0,
    })

In [None]:
input_data_frame.salaire.hist(bins=100)

In [None]:
input_data_frame.pension_retraite.hist(bins=100)

## Microsimulation 

As with test case, we can build a scenario with survey data

In [None]:
scenario = SenegalSurveyScenario(input_data_frame=input_data_frame, year = 2017)

We can compute the value of any variable for the whole population an draw distributions

In [None]:
pd.DataFrame({'impot': scenario.simulation.calculate('impot_revenus', period = 2017)}).hist(bins = 100)

Special methods allow access to aggregates and pivot tables

In [None]:
scenario.compute_aggregate('impot_revenus')

In [None]:
scenario.compute_pivot_table(
    aggfunc = 'sum', 
    values = ['impot_revenus'], 
    columns = ['nombre_enfants'],
    period = 2017,
    ).stack().reset_index().plot(x = 'nombre_enfants')

# Evaluate the financial impact of a reform

Write a parametric reform tthat increseases the top marginal tax rates and evaluate how much revenue can be collected