In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

from src.IterativeFitting import IterativeFitting as IF
from src.CorrFuncs import covariance_matrix, trend_est

In [119]:
# Initializing simulation slope and intercept parameters
beta0 = 0.
beta1 = 0.05

# Number of samples
n = 40000

# Generating plausible x values to throw into probability generator
xs = np.sort(np.random.uniform(low=0,high=20,size=n))

# Getting quantiles of sorted xs
quantile_indices = np.array([n//4, n//2, 3*n//4])

# Getting exposure levels (randomly chosen from quantile ranges)
exposure_levels = [np.random.choice(xs[:quantile_indices[0]])]
for i in range(1,len(quantile_indices)):
    exposure_levels.append(np.random.choice(xs[quantile_indices[i-1]:quantile_indices[i]]))
exposure_levels.append(np.random.choice(xs[quantile_indices[-1]:]))
exposure_indices = np.array([np.where(xs == elem)[0][0] for elem in exposure_levels])

# Function to generate probabilities of being a case v non-case
p = lambda x: (np.exp(beta0 + beta1*x))/(1 + np.exp(beta0 + beta1*x))

# Actually calculating probabilities on exposures as defined
px = p(xs)

# Actually assigning to case or not
outcomes = np.array([np.random.binomial(n=1,p=p,size=1)[0] for p in px])

# Constructing and sorting dataframe of outcomes and exposure
df = np.stack([outcomes,xs],axis=1)
df = df[np.argsort(df[:, 1])]

# Getting xs pertaining to each exposure interval
categories = [df[:exposure_indices[0]]]
for i in range(1,len(exposure_indices)):
    categories.append(df[exposure_indices[i-1]:exposure_indices[i]])
categories.append(df[exposure_indices[-1]:])

# Getting case numbers
cases = np.array([np.sum(df[:,0]) for df in categories])
non_cases = np.array([categories[i].shape[0] - cases[i] for i in range(len(categories))])
M = np.sum(cases)

# TODO: From here, get adujsted estimates through the regression that we walked through in picture