# Replication - Likelihood Approximation: Additional 1 (Large P) - Table

Here we provide a notebook to replicate the simulation results for the likelihood approximations. These are additional simualtions to evaluate the impact of the number of covariates P on the approximation.

This produced the table from the supplement.

The notebook replicates the results in:
- /out/simulation/tables/likelihood_approx_MPE_additional1.csv
- /out/simulation/tables/likelihood_approx_MAPE_additional1.csv

The main script can be found at: 
- /scripts/simulation/tables/likelihood_approx_additional1.py



In [1]:
# google colab specific - installing probcox
!pip3 install probcox

Collecting probcox
  Downloading https://files.pythonhosted.org/packages/4b/a9/d5ab2da5292cdf9bd5233eca90525f266f4aa83949927d3c63ee9393748b/probcox-0.0.5.tar.gz
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting pyro-ppl<1.6
[?25l  Downloading https://files.pythonhosted.org/packages/79/4d/e45ff02364438ce8698ed70b1fbd9240f7c4f6e509fb90e9c04657f895b5/pyro_ppl-1.5.2-py3-none-any.whl (607kB)
[K     |████████████████████████████████| 614kB 7.7MB/s 
Collecting pyro-api>=0.1.1
  Downloading https://files.pythonhosted.org/packages/fc/81/957ae78e6398460a7230b0eb9b8f1cb954c5e913e868e48d89324c68cec7/pyro_api-0.1.2-py3-none-any.whl
Building wheels for collected packages: probcox
  Building wheel for probcox (PEP 517) ... [?25l[?25hdone
  Created wheel for probcox: filename=probcox-0.0.5-cp37-none-any.whl size=5226 sha256=a0a073ee83d9cb23b701b7833990d9bf9c4f09a336eae070de21e

In [2]:
# Modules
# =======================================================================================================================

import os
import sys
import shutil
import subprocess
import tqdm

import numpy as np
import pandas as pd

import torch
from torch.distributions import constraints

import pyro
import pyro.distributions as dist

from pyro.infer import SVI, Trace_ELBO

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import probcox as pcox

dtype = torch.FloatTensor

np.random.seed(90834)
torch.manual_seed(873645)

<torch._C.Generator at 0x7fb3c6593dd0>

In [3]:
# Custom function for evaluation
# =======================================================================================================================

# run the approximation 1000 times for a given setting and return MPE/MAPE
def run(surv, pred, batch, est):
    total_obs = surv.shape[0]
    total_events = torch.sum(surv[:, -1] == 1).numpy().tolist()
    sampling_proportion = [total_obs, batch, total_events, None]
    ll = []
    ll2 = []
    while len(ll) <=1000:
        idx = np.unique(np.concatenate((np.random.choice(np.where(surv[:, -1]==1)[0], 2, replace=False), np.random.choice(range(surv.shape[0]), batch-2, replace=False))))
        sampling_proportion[-1] = torch.sum(surv[idx, -1]).numpy().tolist()
        if torch.sum(surv[idx, -1]) > 1:
            e = pcox.CoxPartialLikelihood(pred=pred[idx], sampling_proportion=sampling_proportion).log_prob(surv=surv[idx]).detach().numpy()
            MPE = ((e-est)/est)
            MAPE = np.abs(MPE)
            ll.append(MPE.tolist())
            ll2.append(MAPE.tolist())
    return(np.mean(ll), np.mean(ll2))

In [4]:
# Simulation Settings
# =======================================================================================================================
I = [10000] # individuals
P = [500, 1000] # covariates
C = [0.5, 0.75, 0.95, 0.99] # censorship
B = [64, 128, 256, 512] # batch size



In [5]:
# Simulation 
# =======================================================================================================================

res = np.zeros((8, 4))
res2 = np.zeros((8, 4))
sim_n =[]
ii = 0
jj = 0
for p in P:
    # make baselinehazard
    cond = True
    scale = 100
    while cond:
        theta = np.random.normal(0, 0.01, (p, 1))
        TVC = pcox.TVC(theta=theta, P_binary=int(p/2), P_continuous=int(p/2), dtype=dtype)
        TVC.make_lambda0(scale=scale)
        s = np.sum([torch.sum(TVC.sample()[0][:, -1]).numpy() for ii in (range(1000))])/1000
        if np.logical_and(s>=0.1, s<=0.9):
            cond = False
        scale = scale/5
        theta_ = torch.normal(0, 0.01, (p, 1)).type(dtype)
    for i in I:
        for c in C:
            # make dataset
            print('s')
            surv, X = TVC.make_dataset(obs=i, fraction_censored=c)

            sim_n.append('I(N): ' + str(i) + '(' + str(surv.shape[0]) + ')' +', P: ' + str(p) + ', C: ' + str(c))

            pred = torch.mm(X, theta_).type(dtype)
            est = pcox.CoxPartialLikelihood(pred=pred, sampling_proportion=None).log_prob(surv=surv).detach().numpy()

            # fit to batch
            for b in tqdm.tqdm(B):
                res[ii, jj], res2[ii, jj] = run(surv=surv, pred=pred, batch=b, est=est)
                jj += 1
            ii += 1
            jj = 0

res = np.round(res, 2)
res2 = np.round(res2, 2)

MPE = pd.DataFrame(np.concatenate((np.asarray(sim_n)[:, None], res.astype(str)), axis=1))
MAPE = pd.DataFrame(np.concatenate((np.asarray(sim_n)[:, None], res2.astype(str)), axis=1))



s


100%|██████████| 4/4 [01:20<00:00, 20.22s/it]


s


100%|██████████| 4/4 [01:08<00:00, 17.11s/it]


s


100%|██████████| 4/4 [01:04<00:00, 16.13s/it]


s


100%|██████████| 4/4 [01:10<00:00, 17.58s/it]


s


100%|██████████| 4/4 [01:10<00:00, 17.57s/it]


s


100%|██████████| 4/4 [01:05<00:00, 16.46s/it]


s


100%|██████████| 4/4 [01:08<00:00, 17.09s/it]


s


100%|██████████| 4/4 [01:06<00:00, 16.69s/it]


In [6]:
MPE

Unnamed: 0,0,1,2,3,4
0,"I(N): 10000(80414), P: 500, C: 0.5",0.04,0.02,0.01,0.0
1,"I(N): 10000(76051), P: 500, C: 0.75",0.03,0.02,0.01,0.0
2,"I(N): 10000(72180), P: 500, C: 0.95",0.04,0.02,0.01,0.0
3,"I(N): 10000(71427), P: 500, C: 0.99",0.03,0.01,0.01,0.01
4,"I(N): 10000(77421), P: 1000, C: 0.5",0.03,0.02,0.01,0.0
5,"I(N): 10000(73952), P: 1000, C: 0.75",0.03,0.02,0.01,0.01
6,"I(N): 10000(71579), P: 1000, C: 0.95",0.03,0.01,0.0,0.0
7,"I(N): 10000(71159), P: 1000, C: 0.99",0.04,0.02,0.01,0.0


In [7]:
MAPE

Unnamed: 0,0,1,2,3,4
0,"I(N): 10000(80414), P: 500, C: 0.5",0.04,0.03,0.02,0.02
1,"I(N): 10000(76051), P: 500, C: 0.75",0.05,0.04,0.03,0.02
2,"I(N): 10000(72180), P: 500, C: 0.95",0.06,0.05,0.04,0.04
3,"I(N): 10000(71427), P: 500, C: 0.99",0.06,0.05,0.05,0.05
4,"I(N): 10000(77421), P: 1000, C: 0.5",0.04,0.03,0.02,0.01
5,"I(N): 10000(73952), P: 1000, C: 0.75",0.05,0.03,0.03,0.02
6,"I(N): 10000(71579), P: 1000, C: 0.95",0.05,0.05,0.04,0.03
7,"I(N): 10000(71159), P: 1000, C: 0.99",0.06,0.06,0.05,0.05
