In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

from src.IterativeFitting import IterativeFitting as IF
from src.CorrFuncs import covariance_matrix, trend_est

In [17]:
# Reading in data
study_df = pd.read_excel("SBPvPAD_data.xlsx")

# Taking care of non-log values in dataframe
study_df.iloc[0,5:8] = np.log(study_df.iloc[0,5:8].to_numpy().astype(np.float64))
study_df.iloc[5,5:8] = np.log(study_df.iloc[5,5:8].to_numpy().astype(np.float64))

# Creating Itoga-specific dataframe
study_df_i = study_df.loc[study_df["Author"] == "Itoga"]
study_df_i = study_df_i.iloc[1:,:]

# Create exposure levels relative to reference exposure
x_i = study_df_i["dose"].to_numpy()[1:] - study_df_i["dose"].to_numpy()[0]

# Get log-odds and corresponding variance estimates
L_i = study_df_i["logOR"].to_numpy()[1:]
v_i = study_df_i["std_error"].to_numpy()[1:]

In [10]:
# Initializing simulation slope and intercept parameters
beta0 = -3.6289
beta1 = 0.0246

# Generating plausible x values to throw into probability generator
xs = np.random.uniform(low=0,high=20,size=40000)

# Function to generate probabilities of being a case v non-case
p = lambda x: (np.exp(beta0 + beta1*x))/(1 + np.exp(beta0 + beta1*x))

# Actually calculating probabilities on exposures as defined
px = p(xs)

# Actually assigning to case or not
outcomes = np.array([np.random.binomial(n=1,p=p,size=1)[0] for p in px])

# Constructing and sorting dataframe of outcomes and exposure
df = np.stack([outcomes,xs],axis=1)
df = df[np.argsort(df[:, 1])]

# Observations at each category level
C1 = df[df[:,1] < x_i[0]]
C2 = df[np.logical_and(df[:,1] >= x_i[0], df[:,1] < x_i[1])]
C3 = df[np.logical_and(df[:,1] >= x_i[1], df[:,1] < x_i[2])]
C4 = df[np.logical_and(df[:,1] >= x_i[2], df[:,1] < x_i[3])]
C5 = df[df[:,1] >= x_i[-1]]

In [11]:
# Getting numbers of cases vs non-cases in each category
cases1 = np.sum(C1[:,0])
noncases1 = C1[:,0].shape[0] - cases1

cases2 = np.sum(C2[:,0])
noncases2 = C2[:,0].shape[0] - cases2

cases3 = np.sum(C3[:,0])
noncases3 = C3[:,0].shape[0] - cases3

cases4 = np.sum(C4[:,0])
noncases4 = C4[:,0].shape[0] - cases4

cases5 = np.sum(C5[:,0])
noncases5 = C5[:,0].shape[0] - cases5

In [12]:
# Creating a function to get crude OR estimates
crude = lambda x,y: (x/y) / (cases1/noncases1)

# Actually calculating the crude OR estimates
crudeor1 = crude(cases1,noncases1)
crudeor2 = crude(cases2,noncases2)
crudeor3 = crude(cases3,noncases3)
crudeor4 = crude(cases4,noncases4)
crudeor5 = crude(cases5,noncases5)

# Getting categories defined
in_cat1 = np.zeros(C1.shape[0]) + 2
in_cat2 = np.zeros(C2.shape[0]) + 3
in_cat3 = np.zeros(C3.shape[0]) + 4
in_cat4 = np.zeros(C4.shape[0]) + 5
in_cat5 = np.zeros(C5.shape[0]) + 6

cats = np.append(in_cat1,in_cat2)
cats = np.append(cats,in_cat3)
cats = np.append(cats,in_cat4)
cats = np.append(cats,in_cat5)

cats_out_df = np.stack([df[:,0],cats], axis=1)

In [15]:
# Getting subjects and total number of cases
N = np.array([cases1+noncases1,cases2+noncases2,cases3+noncases3,cases4+noncases4,cases5+noncases5])
M1 = cases1 + cases2 + cases3 + cases4 + cases5

# Initialization
A0 = M1*N[1:]/(N.sum())

Here, we will use the log-odds and variance estimates from Itoga directly to construct the covariance matrix for the adjusted method, and we will use them again to estimate the slope coefficient on the standard, non-correlation corrected method.

In [18]:
it_fit_ex = IF(L_i,A0,N,M1)
A, B, a0, b0 = it_fit_ex.convexProgram()

C = covariance_matrix(A,B,a0,b0,v_i**2)
inv_C = np.linalg.inv(C)

    You specified your problem should be solved by ECOS. Starting in
    CXVPY 1.6.0, ECOS will no longer be installed by default with CVXPY.
    Please either add an explicit dependency on ECOS or switch to our new
    default solver, Clarabel, by either not specifying a solver argument
    or specifying ``solver=cp.CLARABEL``.
    


In [19]:
# Recovering slope estimate for corrected correlation
vb_star = 1/(np.dot(x_i,np.dot(inv_C,x_i)))
b_star = vb_star*(np.dot(x_i,np.dot(inv_C,L_i)))

In [20]:
b_star

0.014094088127677437

In [24]:
# Recovering standard slope estimate
vb = 1/(np.dot(x_i,np.dot(np.linalg.inv(np.diag(v_i**2)),x_i)))
b = vb*(np.dot(x_i,np.dot(np.linalg.inv(np.diag(v_i**2)),L_i)))

In [25]:
b

0.0077151064467757356