In [147]:
# import modules for math and distributions
from math import exp
import numpy as np
from scipy.stats import gamma, norm, uniform, bernoulli

In [148]:
# define inverse logit function
def logit_1(x): return(exp(x)/(1 + exp(x)))

In [149]:
# Initial set up according to HD2012
T = 40 # time periods
k = 5 # check-up times
theta = [-0.405, 0.0205, -0.00405]
gam = [-3, 0.05, -1.5, 0.1]

In [150]:
# define lists for holding A, L, U and Y
A = np.zeros(T + 1) # A[-1] holds the value for A in t = -1
L = np.zeros(T)
U = np.zeros(T)
Y = np.zeros(T + 1)
eps = np.zeros(T)
lam = np.zeros(T) # prob of failure at each time period
delta = np.zeros(T)

# set the first value of U, U[0], to a 
# randomly generated value from a uniform
# distribution a measure of general health
U[0] = uniform.rvs()
eps[0] = norm.rvs(0, 20)
L[0] = gamma.ppf(U[0], 3, 154)

# set A[-1] to 0
A[-1] = 0
x = logit_1(theta[0] + theta[2] * (L[0] - 500))
A[0] = bernoulli.rvs(x, size=1)

In [151]:
if A[0] == 1: Ts = 0

In [152]:
lam[0] = logit_1(gam[0] + gam[2] * A[0])

In [153]:
if lam[0] >= U[0]:
    Y[1] = 1
else:
    Y[1] = 0

In [154]:
# loop for each individual
for t in range(1, T):
    if Y[t] == 0:
        delta[t] = norm.rvs(0, 0.05)
        U[t] = min(1, max(0, U[t-1] + delta[t]))
        if t % k != 0:
            L[t] = L[t-1]
            A[t] = A[t-1]
        else:
            eps[1] = norm.rvs(100 * (U[t] - 2), 50)
            L[t] = max(0, L[t-1] + 150 * A[t-k] * (A[t-k-1]) + eps[t])
            if A[t-1] == 0:
                A[t] = bernoulli.rvs(logit_1(theta[0] + theta[1] * t + theta[2] * (L[t] - 500)), size=1)
            else:
                A[t] = 1
            if A[t] == 1 and A[t-k] == 0: Ts = t
        lam[t] = logit_1(gam[0] + gam[1] * ((1 - A[t]) * t + A[t] * Ts) + gam[2] * A[t] + gam[3] * A[t] *(t - Ts))
        if (1 - np.prod(1 - lam)) >= U[0]:
            Y[t + 1] = 1
        else:
            Y[t + 1] = 0
    else:
        break

     

In [155]:
# make a function that does all of the above for an individual
def sim(T, k, gam, theta):
    
    # define lists for holding A, L, U and Y
    A = np.zeros(T + 1) # A[-1] holds the value for A in t = -1
    L = np.zeros(T)
    U = np.zeros(T)
    Y = np.zeros(T + 1)
    eps = np.zeros(T)
    lam = np.zeros(T) # prob of failure at each time period
    delta = np.zeros(T)

    # set the first value of U, U[0], to a 
    # randomly generated value from a uniform
    # distribution a measure of general health
    U[0] = uniform.rvs()
    eps[0] = norm.rvs(0, 20)
    L[0] = gamma.ppf(U[0], 3, 154)

    # set A[-1] to 0
    A[-1] = 0
    x = logit_1(theta[0] + theta[2] * (L[0] - 500))
    A[0] = bernoulli.rvs(x, size=1)
    
    # if A[0] == 1: Ts = 0 check!!!!!
    Ts = 0
    
    lam[0] = logit_1(gam[0] + gam[2] * A[0])
    
    if lam[0] >= U[0]:
        Y[1] = 1
    else:
        Y[1] = 0
    # loop through each time period - stop when patient is dead or t = T + 1
    for t in range(1, T):
        if Y[t] == 0:
            delta[t] = norm.rvs(0, 0.05)
            U[t] = min(1, max(0, U[t-1] + delta[t]))
            if t % k != 0:
                L[t] = L[t-1]
                A[t] = A[t-1]
            else:
                eps[1] = norm.rvs(100 * (U[t] - 2), 50)
                L[t] = max(0, L[t-1] + 150 * A[t-k] * (A[t-k-1]) + eps[t])
                if A[t-1] == 0:
                    A[t] = bernoulli.rvs(logit_1(theta[0] + theta[1] * t + theta[2] * (L[t] - 500)), size=1)
                else:
                    A[t] = 1
                if A[t] == 1 and A[t-k] == 0: Ts = t
            lam[t] = logit_1(gam[0] + gam[1] * ((1 - A[t]) * t + A[t] * Ts) + gam[2] * A[t] + gam[3] * A[t] *(t - Ts))
            if (1 - np.prod(1 - lam)) >= U[0]:
                Y[t + 1] = 1
            else:
                Y[t + 1] = 0
        else:
            break
    
    # we only need the data before death, so whatever value t is before the end of the
    # above loop
    Y = np.ndarray.tolist(Y[1:(t+1)])
    U = np.ndarray.tolist(U[0:t])
    L = np.ndarray.tolist(L[0:t])
    A = np.ndarray.tolist(A[0:t])
    Ts = [Ts]*t

    return({"Y":Y, "U":U, "L":L, "A":A, "Ts":Ts, "time":[time for time in range(t)]})

In [156]:
# use sim function to make a pandas DF for n patients
import pandas as pd
n = 1000
frames = [pd.DataFrame.from_dict(sim(T, k, gam, theta)) for i in range(n)]
df = pd.concat(frames)

In [157]:
# make new variables for the logit regression
# including an intercept
df["d1"] = df[["time", "Ts"]].min(axis = 1)
df["time_Ts"] = df["time"] - df["Ts"]
df["d3"] = np.maximum(df["time_Ts"], 0)
df["intercept"] = 1.0

In [159]:
# try a logistic model
import statsmodels.api as sm
reg = ["intercept", "d1", "A", "d3"]
logit = sm.Logit(df['Y'], df[reg])
result = logit.fit()
result.summary()

Optimization terminated successfully.
         Current function value: 0.173518
         Iterations 8


0,1,2,3
Dep. Variable:,Y,No. Observations:,20554.0
Model:,Logit,Df Residuals:,20550.0
Method:,MLE,Df Model:,3.0
Date:,"Wed, 28 Jun 2017",Pseudo R-squ.:,0.1006
Time:,12:13:00,Log-Likelihood:,-3566.5
converged:,True,LL-Null:,-3965.6
,,LLR p-value:,1.1070000000000001e-172

0,1,2,3,4,5
,coef,std err,z,P>|z|,[95.0% Conf. Int.]
intercept,-3.0467,0.126,-24.178,0.000,-3.294 -2.800
d1,0.0172,0.013,1.288,0.198,-0.009 0.043
A,-1.6456,0.146,-11.256,0.000,-1.932 -1.359
d3,0.1098,0.004,27.218,0.000,0.102 0.118


In [160]:
# next step is to do this 100 times, store results 
# use the average from the gamma parameters, and calculate
# their standard deviations

In [None]:
# work out stabilized IPTW
# start by creating an ID variable in sim function
# should then be possible to group by individual and
# calcualte weights


In [146]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
