In [2]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.api as sm

In [3]:
num = 100 
 
difficulty = np.random.uniform(0, 1, (num,)) 
 
speed = np.maximum(np.random.normal(15, 5, (num, )) - difficulty * 10, 0) 
 
accident = np.minimum(np.maximum(0.03 * speed + 0.4 * difficulty + np.random.normal(0, 0.3, (num,)), 0), 1) 
 
df = pd.DataFrame({'difficulty': difficulty, 'speed': speed, 'accident': accident}) 

In [6]:
df.head()

Unnamed: 0,difficulty,speed,accident
0,0.010143,19.650077,0.03837
1,0.229877,16.256299,0.49901
2,0.09724,15.475153,0.014609
3,0.229962,15.888772,0.593325
4,0.21985,15.755877,0.913014


In [15]:
coefficients = []

for _ in range(1000):  # Run 1000 simulations
    num = 100
    difficulty = np.random.uniform(0, 1, num)
    speed = np.maximum(np.random.normal(15, 5, num) - difficulty * 10, 0)
    
    # Regress speed on difficulty
    X = sm.add_constant(difficulty)
    model = sm.OLS(speed, X).fit()
    coefficients.append(model.params[1])  # coefficient for 'difficulty'

# Compute average coefficient
avg_coef = np.mean(coefficients)
print(f"Average coefficient: {avg_coef}")

Average coefficient: -9.577078755032588


In [19]:
coefficients = []

for _ in range(1000):  # Repeat many times
    num = 100
    difficulty = np.random.uniform(0, 1, num)
    speed = np.maximum(np.random.normal(15, 5, num) - difficulty * 10, 0)
    accident = np.clip(0.03 * speed + 0.4 * difficulty + np.random.normal(0, 0.3, num), 0, 1)

    df = pd.DataFrame({'difficulty': difficulty, 'speed': speed, 'accident': accident})
    
    # Regress speed on difficulty and accident
    X = sm.add_constant(df[['difficulty', 'accident']])
    y = df['speed']
    model = sm.OLS(y, X).fit()
    coefficients.append(model.params['difficulty'])

avg_coef = np.mean(coefficients)
print(f"Average coefficient for difficulty: {avg_coef}")

Average coefficient for difficulty: -10.282030869594363
