# Logistic regression example

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pystan

plt.rcParams.update({'figure.max_open_warning': 0})
plt.rcParams["figure.figsize"]=15,7.5
plt.rcParams.update({'font.size': 24})
figpath='../doc/fig/'

In [None]:
############################
############################
##    data generation     ##
############################
############################

# settings
np.random.seed(520)
N = 100 
true_beta = np.array([2.,3.])

# create data matrix
X = np.ones((N,2))
X[:,1] = -1.+1.5*np.random.randn(N)

# create response variable
eta = X.dot(true_beta)
true_p = 1./(1+np.exp(-eta))
Y = np.random.binomial(1,true_p,N)

In [None]:
plt.scatter(X[:,1],true_p,c='black')
plt.scatter(X[:,1],Y,c='cornflowerblue')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$p$')
plt.title(r'Response $y$ and probability as function of $\beta_1$')

## Fit model with Stan

In [None]:
# model definition
stan_code = """
data {
  int<lower=0> n; // number of observations
  int y[n]; // responses
  vector[n] x1; // predictors
  real beta0; // intercept
}
parameters {
  real beta1;
}
model {
  beta1 ~ normal(0, 30);
  y ~ bernoulli_logit(beta0+beta1*x1);
}
"""

In [None]:
model = pystan.StanModel(model_code=stan_code)

In [None]:
# run in stan
stan_data = {'n': N,
             'y': Y,
             'x1': X[:,1],
             'beta0': true_beta[0]}
fit = model.sampling(data=stan_data, iter=10000, chains=1)

In [None]:
# summary statistics
print(fit.stansummary(pars=['beta1']))
samples = fit.extract()

In [None]:
plt.hist(samples['beta1'], color='#2D718EFF',ec='black',alpha=0.25,label='MCMC',density=True)
plt.vlines(true_beta[1],0,1.2,color='black',ls='dashed',label='True')
plt.xlabel(r'$\beta_1$')
plt.ylabel('Density')
leg = plt.legend(fontsize='small',frameon=False)
#for lh in leg.legendHandles: lh.set_alpha(1)
#plt.savefig(fig_path+'logreg_mcmc_scatter.pdf', dpi=300, bbox_inches='tight')

In [None]:
fit.plot()