In [72]:
import numpy as np
import pandas as pd
from scipy.linalg import toeplitz
from scipy.stats import norm
from utils.simulations import *

Schoenfeld formula
$$D = \frac{(\Phi^{-1}(\beta)+\Phi^{-1}(1-\alpha))^2}{P_{cont}(1 - P_{cont}) log^2(\Delta)}$$
where 
- $D$ is the number of deaths
- $\alpha$ is the level
- $\beta$ is the power
- $P_{cont}$ is the proportion of patients in the control arm
- $\Delta$ is the hazard ratio

In [175]:
n_samples = 100
n_features_bytype = 4
n_features = 3 * n_features_bytype

In [176]:
beta = np.concatenate([weights_sparse_exp(n_features_bytype,3),weights_sparse_exp(n_features_bytype,3),
                       weights_sparse_exp(n_features_bytype,3)])
treatment_effect = 1
beta = np.insert(beta, 0, treatment_effect)
print(beta.shape)

(13,)


In [177]:
X = features_normal_cov_toeplitz(n_samples,n_features)
print(X.shape)

(100, 12)


In [178]:
X[:,(n_features_bytype ) : (2*n_features_bytype )] = np.abs(X[:,(n_features_bytype ) : (2*n_features_bytype )])
X[:,(2*n_features_bytype ) : (3*n_features_bytype )] = 1 * (X[:,(2*n_features_bytype ) : (3*n_features_bytype )]>= 0)

In [179]:
p_treated = 0.5
treatment = np.random.binomial(1, p_treated, size=(n_samples,1))

In [180]:
design = np.hstack((treatment,X))
marker = np.dot(design,beta)

In [181]:
a_T = 2
a_C = 1
lamb_C = 2
U = np.random.uniform(size = n_samples)
V = np.random.uniform(size = n_samples)
T = (- np.log(1-U) / np.exp(marker))**(1/a_T)
C = lamb_C * (- np.log(1-V))**(1/a_C)

In [182]:
D = np.sum(T <= C)

In [183]:
alpha = 0.05
expected_power = norm.cdf(np.sqrt( D * p_treated * (1 - p_treated) * (treatment_effect)**2 ) - norm.ppf(1 - alpha))

In [184]:
expected_power

np.float64(0.965563197715057)

In [185]:
M = 500
log_p_value = []
for m in np.arange(M):
    X = features_normal_cov_toeplitz(n_samples,n_features)
    X[:,(n_features_bytype ) : (2*n_features_bytype )] = np.abs(X[:,(n_features_bytype ) : (2*n_features_bytype )])
    X[:,(2*n_features_bytype ) : (3*n_features_bytype )] = 1 * (X[:,(2*n_features_bytype ) : (3*n_features_bytype )]>= 0)
    treatment = np.random.binomial(1, p_treated, size=(n_samples,1))
    design = np.hstack((treatment,X))
    marker = np.dot(design,beta)
    U = np.random.uniform(size = n_samples)
    V = np.random.uniform(size = n_samples)
    T = (- np.log(1-U) / np.exp(marker))**(1/a_T)
    C = lamb_C * (- np.log(1-V))**(1/a_C)
    data = pd.DataFrame(X)
    data['treatment'] = treatment
    data['time'] = np.min([T,C],axis=0)
    data['censor'] = np.argmin([C,T],axis=0)
    control = data[data['treatment'] == 0]
    treated = data[data['treatment'] == 1]
    log_p_value.append(compute_logrank_test(control, treated))

In [186]:
np.mean(log_p_value >= -np.log(0.05))

np.float64(0.262)