In [127]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

from src.IterativeFitting import IterativeFitting as IF
from src.CorrFuncs import covariance_matrix, trend_est

In [157]:
# Initializing simulation slope and intercept parameters
beta0 = 0.
beta1 = 0.05

# Number of samples
n = 40000

# Generating plausible x values to throw into probability generator
xs = np.sort(np.random.uniform(low=0,high=20,size=n))

# Getting quantiles of sorted xs
quantile_indices = np.array([n//4, n//2, 3*n//4])

# Getting exposure levels (randomly chosen from quantile ranges)
exposure_levels = [np.random.choice(xs[:quantile_indices[0]])]
for i in range(1,len(quantile_indices)):
    exposure_levels.append(np.random.choice(xs[quantile_indices[i-1]:quantile_indices[i]]))
exposure_levels.append(np.random.choice(xs[quantile_indices[-1]:]))
exposure_indices = np.array([np.where(xs == elem)[0][0] for elem in exposure_levels])

# Function to generate probabilities of being a case v non-case
p = lambda x: (np.exp(beta0 + beta1*x))/(1 + np.exp(beta0 + beta1*x))

# Actually calculating probabilities on exposures as defined
px = p(xs)

# Actually assigning to case or not
outcomes = np.array([np.random.binomial(n=1,p=p,size=1)[0] for p in px])

# Constructing and sorting dataframe of outcomes and exposure
df = np.stack([outcomes,xs],axis=1)
df = df[np.argsort(df[:, 1])]

# Getting xs corresponding to each exposure interval
categories = [df[:exposure_indices[0]]]
for i in range(1,len(exposure_indices)):
    categories.append(df[exposure_indices[i-1]:exposure_indices[i]])
categories.append(df[exposure_indices[-1]:])

# Getting case numbers
A = np.array([np.sum(df[:,0]) for df in categories])
N = np.array([df.shape[0] for df in categories])
B = N - A
M = np.sum(A)

# 
age_vector = np.random.randint(17,75,size=n)
dummy_list = []
for i in range(1,len(exposure_indices)):
    exp_cat = np.zeros(n)
    exp_cat[exposure_indices[i-1]:exposure_indices[i]] = 1
    dummy_list.append(exp_cat)
last_cat = np.zeros(n)
last_cat[exposure_indices[-1]:] = 1
dummy_list.append(last_cat)
to_adjust_matrix = np.column_stack([np.ones(n),age_vector] + dummy_list)

In [158]:
to_adjust_matrix

array([[ 1., 70.,  0.,  0.,  0.,  0.],
       [ 1., 21.,  0.,  0.,  0.,  0.],
       [ 1., 63.,  0.,  0.,  0.,  0.],
       ...,
       [ 1., 45.,  0.,  0.,  0.,  1.],
       [ 1., 69.,  0.,  0.,  0.,  1.],
       [ 1., 50.,  0.,  0.,  0.,  1.]])

In [154]:
exposure_indices

array([ 7054, 15224, 23046, 38813])