# Simulating data

In [1]:
import pandas as pd
from scipy import stats
from scipy import special
import numpy as np
import altair as alt

np.random.seed(seed=23) 

In [2]:
x = np.linspace(start = -3, stop = 3, num = 1000)
y_mean = special.expit(2 * x - 3)
y_bern = stats.bernoulli.rvs(p = y_mean)

data_df = pd.DataFrame({"x": x, "y_mean": y_mean, "y": y_bern})

layer_line = alt.Chart(data_df).mark_line(color = "red").encode(x = "x", y = "y_mean")
layer_point = alt.Chart(data_df).mark_point().encode(x = "x", y = "y")

layer_line + layer_point

# Inference

In [3]:
import statsmodels.formula.api as smf
from statsmodels.genmod import families

In [4]:
logit_glm = smf.glm(formula = "y ~ x",
                    data = data_df,
                    family = families.Binomial(link = families.links.logit())).fit()
logit_with_helper = smf.logit(formula = "y ~ x",
                              data = data_df).fit()

Optimization terminated successfully.
         Current function value: 0.253654
         Iterations 8


In [5]:
print(logit_glm.summary())
print(logit_with_helper.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                 1000
Model:                            GLM   Df Residuals:                      998
Model Family:                Binomial   Df Model:                            1
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -253.65
Date:                Sat, 14 Nov 2020   Deviance:                       507.31
Time:                        12:23:01   Pearson chi2:                     919.
No. Iterations:                     7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.0968      0.233    -13.289      0.0