# Replication - Example data - Table

Here we provide a notebook to replicate the simulation results for the small data example table in the main paper. 

The notebook replicates the results in:
- /out/simulation/tables/example.csv

The main script can be found at: 
- /scripts/simulation/tables/data_example.py



In [13]:
# google colab specific - installing probcox
!pip3 install probcox



In [14]:
# Modules
# =======================================================================================================================

import os
import sys
import shutil
import subprocess
import tqdm

import numpy as np
import pandas as pd

import torch
from torch.distributions import constraints

import pyro
import pyro.distributions as dist

from pyro.infer import SVI, Trace_ELBO

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import probcox as pcox

dtype = torch.FloatTensor

np.random.seed(909)
torch.manual_seed(9034)

<torch._C.Generator at 0x7fd9ed398dd0>

In [15]:
# Simulation Settings
# =======================================================================================================================

I = 1000 # Number of Individuals
P_binary = 3
P_continuous = 3
P = P_binary + P_continuous
theta = np.asarray([-0.1, 0.8, 0, 0.8, 1.5, 0])[:, None]
scale = 0.5  # Scaling factor for Baseline Hazard


In [16]:
# Simulation
# =======================================================================================================================

TVC = pcox.TVC(theta=theta, P_binary=P_binary, P_continuous=P_continuous, dtype=dtype)
TVC.make_lambda0(scale=scale)

surv = torch.zeros((0, 3))
X = torch.zeros((0, 6))
for __ in (range(3)):
    a, b = TVC.sample()
    surv = torch.cat((surv, a))
    X = torch.cat((X, b))


In [17]:
# Table
# =======================================================================================================================
dd = pd.DataFrame(np.round(surv.numpy().astype(int), 2))
X = pd.DataFrame(np.round(X.numpy(), 2))

dd = pd.concat([dd, X], axis=1)
dd.columns = ['start', 'stop', 'event' ,'X1' ,'X2' ,'X3' ,'X4' ,'X5' ,'X6']



In [18]:
dd # for the paper we removed the second individuals to save space.

Unnamed: 0,start,stop,event,X1,X2,X3,X4,X5,X6
0,0,1000,0,0.0,0.0,0.0,0.0,0.0,0.0
1,1000,3235,0,1.0,0.0,0.0,-1.22,-0.3,0.61
2,3235,6671,0,0.0,0.0,1.0,0.14,0.32,0.61
3,6671,8551,0,0.0,0.0,0.0,-0.76,-0.2,0.65
4,8551,10146,0,0.0,1.0,1.0,-0.19,-0.52,0.13
5,10146,11107,1,0.0,1.0,0.0,1.55,0.85,1.84
6,0,2322,0,0.0,0.0,0.0,0.0,0.0,0.0
7,2322,3193,0,0.0,0.0,0.0,-0.18,0.93,-0.17
8,3193,6091,0,0.0,0.0,0.0,0.35,0.93,-1.26
9,6091,8118,0,1.0,0.0,1.0,0.51,2.43,0.26
