# Replication - Likelihood Approximation: Additional 2 (Large effect size) - Table

Here we provide a notebook to replicate the simulation results for the likelihood approximations. These are additional simualtions to evaluate the impact of the effect size on the approximation.

This produced the table from the supplement.

The notebook replicates the results in:
- /out/simulation/tables/likelihood_approx_MPE_additional2.csv
- /out/simulation/tables/likelihood_approx_MAPE_additional2.csv

The main script can be found at: 
- /scripts/simulation/tables/likelihood_approx_additional2.py



In [1]:
# google colab specific - installing probcox
!pip3 install probcox

Collecting probcox
  Downloading https://files.pythonhosted.org/packages/4b/a9/d5ab2da5292cdf9bd5233eca90525f266f4aa83949927d3c63ee9393748b/probcox-0.0.5.tar.gz
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting pyro-ppl<1.6
[?25l  Downloading https://files.pythonhosted.org/packages/79/4d/e45ff02364438ce8698ed70b1fbd9240f7c4f6e509fb90e9c04657f895b5/pyro_ppl-1.5.2-py3-none-any.whl (607kB)
[K     |████████████████████████████████| 614kB 2.6MB/s 
Collecting pyro-api>=0.1.1
  Downloading https://files.pythonhosted.org/packages/fc/81/957ae78e6398460a7230b0eb9b8f1cb954c5e913e868e48d89324c68cec7/pyro_api-0.1.2-py3-none-any.whl
Building wheels for collected packages: probcox
  Building wheel for probcox (PEP 517) ... [?25l[?25hdone
  Created wheel for probcox: filename=probcox-0.0.5-cp37-none-any.whl size=5226 sha256=cafe9b5301450190015347eb54850960c281298415fcb7bfeb800

In [2]:
# Modules
# =======================================================================================================================

import os
import sys
import shutil
import subprocess
import tqdm

import numpy as np
import pandas as pd

import torch
from torch.distributions import constraints

import pyro
import pyro.distributions as dist

from pyro.infer import SVI, Trace_ELBO

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import probcox as pcox

dtype = torch.FloatTensor

np.random.seed(90834)
torch.manual_seed(873645)

<torch._C.Generator at 0x7f58835e0e10>

In [3]:
# Custom function for evaluation
# =======================================================================================================================

# run the approximation 1000 times for a given setting and return MPE/MAPE
def run(surv, pred, batch, est):
    total_obs = surv.shape[0]
    total_events = torch.sum(surv[:, -1] == 1).numpy().tolist()
    sampling_proportion = [total_obs, batch, total_events, None]
    ll = []
    ll2 = []
    while len(ll) <=1000:
        idx = np.unique(np.concatenate((np.random.choice(np.where(surv[:, -1]==1)[0], 2, replace=False), np.random.choice(range(surv.shape[0]), batch-2, replace=False))))
        sampling_proportion[-1] = torch.sum(surv[idx, -1]).numpy().tolist()
        if torch.sum(surv[idx, -1]) > 1:
            e = pcox.CoxPartialLikelihood(pred=pred[idx], sampling_proportion=sampling_proportion).log_prob(surv=surv[idx]).detach().numpy()
            MPE = ((e-est)/est)
            MAPE = np.abs(MPE)
            ll.append(MPE.tolist())
            ll2.append(MAPE.tolist())
    return(np.mean(ll), np.mean(ll2))

In [4]:
# Simulation Settings
# =======================================================================================================================

I = [5000] # individuals
P = [20] # covariates
C = [0.75] # censorship
T = [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2] # scale for normal distribuition over theta
B = [64, 128, 256, 512] # batch size



In [5]:
# Simulation 
# =======================================================================================================================
res = np.zeros((8, 4))
res2 = np.zeros((8, 4))
sim_n =[]
ii = 0
jj = 0
for t in T:

    cond = True
    scale = 10
    while cond:
        theta = np.random.normal(0, t, (20, 1))
        TVC = pcox.TVC(theta=theta, P_binary=int(10), P_continuous=int(10), dtype=dtype)
        TVC.make_lambda0(scale=scale)
        s = np.sum([torch.sum(TVC.sample()[0][:, -1]).numpy() for ii in (range(100))])/100

        if np.logical_and(s>=0.1, s<=0.9):
            cond = False
        scale = scale/5

    theta_ = torch.normal(0, t, (20, 1)).type(dtype)

    n=5000
    c = 0.75
    # make dataset
    surv, X = TVC.make_dataset(obs=n, fraction_censored=c)
    pred = torch.mm(X, theta_).type(dtype)
    minmax = str((np.round(np.min(pred.detach().numpy()), 2), np.round(np.max(pred.detach().numpy()), 2)))

    sim_n.append('I(N): ' + str(n) + '(' + str(surv.shape[0]) + ')' +', LP(min, max): ' + str(minmax))

    pred = torch.mm(X, theta_).type(dtype)
    est = pcox.CoxPartialLikelihood(pred=pred, sampling_proportion=None).log_prob(surv=surv).detach().numpy()

    # fit to batch
    for b in tqdm.tqdm(B):
        print(b)
        res[ii, jj], res2[ii, jj] = run(surv=surv, pred=pred, batch=b, est=est)
        jj += 1
    ii += 1
    jj = 0

res = np.round(res, 2)
res2 = np.round(res2, 2)

MPE = pd.DataFrame(np.concatenate((np.asarray(sim_n)[:, None], res.astype(str)), axis=1))
MAPE = pd.DataFrame(np.concatenate((np.asarray(sim_n)[:, None], res2.astype(str)), axis=1))



  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:08<00:25,  8.48s/it]

128


 50%|█████     | 2/4 [00:16<00:16,  8.47s/it]

256


 75%|███████▌  | 3/4 [00:25<00:08,  8.50s/it]

512


100%|██████████| 4/4 [00:34<00:00,  8.54s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:06<00:18,  6.16s/it]

128


 50%|█████     | 2/4 [00:12<00:12,  6.19s/it]

256


 75%|███████▌  | 3/4 [00:18<00:06,  6.24s/it]

512


100%|██████████| 4/4 [00:25<00:00,  6.35s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:06<00:20,  6.72s/it]

128


 50%|█████     | 2/4 [00:13<00:13,  6.71s/it]

256


 75%|███████▌  | 3/4 [00:20<00:06,  6.73s/it]

512


100%|██████████| 4/4 [00:27<00:00,  6.78s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:07<00:22,  7.36s/it]

128


 50%|█████     | 2/4 [00:14<00:14,  7.38s/it]

256


 75%|███████▌  | 3/4 [00:22<00:07,  7.42s/it]

512


100%|██████████| 4/4 [00:29<00:00,  7.49s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:06<00:20,  6.81s/it]

128


 50%|█████     | 2/4 [00:13<00:13,  6.82s/it]

256


 75%|███████▌  | 3/4 [00:20<00:06,  6.82s/it]

512


100%|██████████| 4/4 [00:27<00:00,  6.85s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:06<00:19,  6.37s/it]

128


 50%|█████     | 2/4 [00:12<00:12,  6.39s/it]

256


 75%|███████▌  | 3/4 [00:19<00:06,  6.43s/it]

512


100%|██████████| 4/4 [00:25<00:00,  6.49s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:06<00:20,  6.78s/it]

128


 50%|█████     | 2/4 [00:13<00:13,  6.78s/it]

256


 75%|███████▌  | 3/4 [00:20<00:06,  6.81s/it]

512


100%|██████████| 4/4 [00:27<00:00,  6.86s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

64


 25%|██▌       | 1/4 [00:05<00:16,  5.46s/it]

128


 50%|█████     | 2/4 [00:10<00:10,  5.48s/it]

256


 75%|███████▌  | 3/4 [00:16<00:05,  5.50s/it]

512


100%|██████████| 4/4 [00:22<00:00,  5.57s/it]


In [6]:
MPE

Unnamed: 0,0,1,2,3,4
0,"I(N): 5000(39437), LP(min, max): (-1.2, 1.4)",0.03,0.02,0.01,0.0
1,"I(N): 5000(28766), LP(min, max): (-4.44, 4.46)",-0.0,-0.01,-0.01,-0.01
2,"I(N): 5000(31022), LP(min, max): (-5.43, 8.0)",-0.03,-0.03,-0.02,-0.01
3,"I(N): 5000(33931), LP(min, max): (-12.27, 10.63)",-0.1,-0.09,-0.08,-0.06
4,"I(N): 5000(31299), LP(min, max): (-14.29, 14.51)",-0.23,-0.18,-0.14,-0.11
5,"I(N): 5000(29461), LP(min, max): (-18.66, 19.78)",-0.31,-0.26,-0.21,-0.16
6,"I(N): 5000(31383), LP(min, max): (-19.77, 16.65)",-0.27,-0.24,-0.19,-0.14
7,"I(N): 5000(24956), LP(min, max): (-35.14, 30.38)",-0.42,-0.34,-0.28,-0.22


In [7]:
MAPE

Unnamed: 0,0,1,2,3,4
0,"I(N): 5000(39437), LP(min, max): (-1.2, 1.4)",0.05,0.04,0.03,0.02
1,"I(N): 5000(28766), LP(min, max): (-4.44, 4.46)",0.07,0.06,0.04,0.03
2,"I(N): 5000(31022), LP(min, max): (-5.43, 8.0)",0.08,0.06,0.05,0.03
3,"I(N): 5000(33931), LP(min, max): (-12.27, 10.63)",0.13,0.12,0.1,0.08
4,"I(N): 5000(31299), LP(min, max): (-14.29, 14.51)",0.24,0.19,0.15,0.11
5,"I(N): 5000(29461), LP(min, max): (-18.66, 19.78)",0.32,0.26,0.21,0.16
6,"I(N): 5000(31383), LP(min, max): (-19.77, 16.65)",0.28,0.24,0.19,0.15
7,"I(N): 5000(24956), LP(min, max): (-35.14, 30.38)",0.42,0.35,0.28,0.22
