In [2]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import numpy.ma as ma
import pandas as pd
import pymc as pm
import seaborn as sns

In [3]:
data_original = pd.read_csv("microdata.csv")

data_a = data_original[['m_annual_salary_tm2', 'gr_m_family_status']].copy()#[data['m_annual_salary_tm2']>0]

data_a.loc[data_a['gr_m_family_status']!=2, 'gr_m_family_status']=1
data_a['gr_m_family_status'] = data_a['gr_m_family_status'] - 1

#m_annual_salary_tm2 = data_a['m_annual_salary_tm2']
gr_m_family_status = np.array(data_a['gr_m_family_status'])
m_annual_salary_tm2 = np.ma.array(data_a['m_annual_salary_tm2'],
                                mask = np.isnan(data_a['m_annual_salary_tm2']))

In [4]:
with pm.Model() as model_b:
    # Probability of zero inflation
    psi = pm.Beta('psi', alpha=1, beta=1)
    
    # Parameters for the Normal distribution
    mu = pm.Normal('mu', mu=m_annual_salary_tm2.mean(), sigma=m_annual_salary_tm2.std())
    sigma = pm.HalfNormal('sigma', sigma=10)
    
    # Bernoulli distribution to model the occurrence of zeros
    zi = pm.Bernoulli('zi', p=psi, shape=len(m_annual_salary_tm2))
    
    # The Normal part of the model for non-zero observations
    salary_obs = pm.Normal('salary', mu=mu, sigma=sigma, observed=m_annual_salary_tm2[zi == 1])
    
    # Likelihood for the zero observations
    pm.Potential('salary_zero', (1 - zi) * -np.log(psi))
    
    # Model for binary full data dependent variable of marital status
    theta = pm.Deterministic('theta', pm.math.sigmoid(mu))  # Assuming dependency on mu
    marital_status = pm.Bernoulli('marital_status', p=theta, observed=gr_m_family_status)

In [None]:
with model_b:
    trace_b = pm.sample(10000, tune=1000, step=pm.Metropolis())
    

In [None]:
with pm.Model() as model_a:
    # Priors
    beta = pm.Normal('beta', mu=-6000, sigma=10)
    
    # Model for B
    b = pm.Normal('b', mu=0, sigma=10, shape=len(m_annual_salary_tm2), observed=m_annual_salary_tm2)
    
    # Logistic regression model for A
    p = pm.math.invlogit(beta + b)

    # Likelihood for observed A
    a = pm.Bernoulli('a', p=p, observed=gr_m_family_status)