In [8]:
# Simulate regression data: y, x1, x2
# with given corr level between x1 and x2:

import numpy as np
import pandas as pd

def simulate_data(n, corr, seed, betas: list = [0, 1, 0]):
    np.random.seed(seed)
    x1 = np.random.normal(size=n)
    x2 = corr * x1 + np.sqrt(1 - corr**2) * np.random.normal(size=n)
    y = betas[0] + betas[1] * x1 + betas[2] * x2 + np.random.normal(size=n)
    df = pd.DataFrame({'y': y, 'x1': x1, 'x2': x2})
    return df


df = simulate_data(1000, 0.99, 0)

In [9]:
df.head(2)

Unnamed: 0,y,x1,x2
0,0.231131,1.764052,1.82484
1,-1.311813,0.400157,0.522055


In [15]:
# For different cor levels, compute the fraction of times the model
# identifies x2 as significant:

import statsmodels.api as sm

cors = [-.995, -.99, -.9, -.5, -.25, 0, .25, .5, .9, .99, .995]
n = 500
B = 1_000
betas = [
    [0, 1, 0],
    [0, 1, 1],
]


# Init dict with keys:
results = []
for corr in cors:
    for bs in betas:
        significant = 0
        pvalues_ = []
        for i in range(B):
            df = simulate_data(n, corr, i, bs)
            model = sm.OLS(df['y'], sm.add_constant(df[['x1', 'x2']])).fit()
            # Save all pvalues for all vars:
            pvalues_.append(model.pvalues.to_dict())
        df_pvalues = pd.DataFrame(pvalues_)
        df_pvalues["corr"] = corr
        df_pvalues["betas"] = str(bs)
        results.append(df_pvalues)


In [19]:
df_pvalues.head(2)

Unnamed: 0,const,x1,x2,corr,betas,signif_x1,signif_x2,signif_both
0,0.528583,0.058262,0.806485,-0.995,"[0, 1, 0]",False,False,False
1,0.097725,0.007865,0.615841,-0.995,"[0, 1, 0]",True,False,False


In [18]:
df_pvalues = pd.concat(results)
df_pvalues["signif_x1"] = df_pvalues["x1"] < 0.01
df_pvalues["signif_x2"] = df_pvalues["x2"] < 0.01
df_pvalues["signif_both"] = df_pvalues["signif_x1"] & df_pvalues["signif_x2"]
df_pvalues.groupby(["betas", "corr"])[["signif_x1", "signif_x2", "signif_both"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,signif_x1,signif_x2,signif_both
betas,corr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"[0, 1, 0]",-0.995,0.383,0.01,0.002
"[0, 1, 0]",-0.99,0.743,0.01,0.002
"[0, 1, 0]",-0.9,1.0,0.01,0.01
"[0, 1, 0]",-0.5,1.0,0.01,0.01
"[0, 1, 0]",-0.25,1.0,0.01,0.01
"[0, 1, 0]",0.0,1.0,0.01,0.01
"[0, 1, 0]",0.25,1.0,0.01,0.01
"[0, 1, 0]",0.5,1.0,0.01,0.01
"[0, 1, 0]",0.9,1.0,0.01,0.01
"[0, 1, 0]",0.99,0.688,0.01,0.008
