In [1]:
import numpy as np
from xgbsurv.models.utils import transform, transform_back
from xgbsurv.models.eh_final import eh_likelihood, eh_gradient
from xgbsurv.models.eh_ah_final import ah_likelihood, ah_objective
from xgbsurv.models.eh_aft_final import aft_likelihood, aft_objective
import sys
sys.path.append('/Users/JUSC/Documents/xgbsurv_benchmarking/deep_learning/')
from loss_functions_pytorch import eh_likelihood_torch, eh_likelihood_torch_2, aft_likelihood_torch, ah_likelihood_torch
import torch
import math

In [2]:
# create data function

def ah_data(type='np'):
    #h2==0 scenario
    if type=='np':
        linear_predictor = np.array([[0.67254923, 0.0],
        [0.86077982, 0.0],
        [0.43557393, 0.0],
        [0.94059047, 0.0],
        [0.8446509 , 0.0],
        [0.23657039, 0.0],
        [0.74629685, 0.0],
        [0.99700768, 0.0],
        [0.28182768, 0.0],
        [0.44495038, 0.0]],dtype='float32')
        y = np.array([[1., -3., -3., -4., -7.,  8.,  9.,  -11.,  13.,  16.]],dtype='float32')
        time = np.array([[ 1,  3,  3,  4,  7,  8,  9, 11, 13, 16]])
        event = np.array([[1, 0, 0, 0, 0, 1, 1, 0, 1, 1]],dtype=np.float32)
    if type=='torch':
        linear_predictor = torch.tensor([[0.67254923, 0.0],
        [0.86077982, 0.0],
        [0.43557393, 0.0],
        [0.94059047, 0.0],
        [0.8446509 , 0.0],
        [0.23657039, 0.0],
        [0.74629685, 0.0],
        [0.99700768, 0.0],
        [0.28182768, 0.0],
        [0.44495038, 0.0]], requires_grad=True)
        y = torch.tensor([[1, -3, -3, -4, -7,  8,  9,  -11,  13,  16]])
        time = torch.tensor([[ 1,  3,  3,  4,  7,  8,  9, 11, 13, 16]])
        event = torch.tensor([[1, 0, 0, 0, 0, 1, 1, 0, 1, 1]],dtype=torch.float32).reshape(10,1)

    return y, linear_predictor, time, event

In [3]:
y, linear_predictor, time, event = ah_data(type='torch')

## Structure

- Compare loss to original function

## 1. Compare loss to original function



In [4]:
# EH loss from paper

def eaftloss(out, time, delta): ##loss function for AFT or EH
    ia, ib = out.size()
    if ib == 1: ###loss function for AFT
        n = len(delta)
        h = 1.30*math.pow(n,-0.2)
        #h 1.304058*math.pow(n,-0.2)  ## 1.304058*n^(-1/5) or 1.587401*math.pow(n,-0.333333) 1.587401*n^(-1/3)
        time = time.view(n,1)
        delta = delta.view(n,1)
        
        # R = g(Xi) + log(Oi)
        R = torch.add(out,torch.log(time)) 
        
        # Rj - Ri
        rawones = torch.ones([1,n], dtype = out.dtype)
        R1 = torch.mm(R,rawones)
        R2 = torch.mm(torch.t(rawones),torch.t(R))
        DR = R1 - R2 
        
        # K[(Rj-Ri)/h]
        K = normal_density(DR/h)
        Del = torch.mm(delta, rawones)
        DelK = Del*K 
        
        # (1/nh) *sum_j Deltaj * K[(Rj-Ri)/h]
        Dk = torch.sum(DelK, dim=0)/(n*h)
        
        # log {(1/nh) * Deltaj * K[(Rj-Ri)/h]}    
        log_Dk = torch.log(Dk)     
        A = torch.t(delta)*log_Dk/n   
        S1 = A.sum()  
        
        ncdf=torch.distributions.normal.Normal(torch.tensor([0.0], dtype = out.dtype), torch.tensor([1.0], dtype = out.dtype)).cdf
        P = ncdf(DR/h)
        CDF_sum = torch.sum(P, dim=0)/n
        Q = torch.log(CDF_sum)
        S2 = -(delta*Q.view(n,1)).sum()/n
             
        S0 = -(delta*torch.log(time)).sum()/n
        
        S = S0 + S1 + S2 
        S = -S
    else: ### loss function for Extended hazard model
        n = len(out[:,0])
        h = 1.30*math.pow(n,-0.2)  ## or 1.59*n^(-1/3)
        time = time.view(n,1)
        delta = delta.view(n,1)
        g1 = out[:,0].view(n,1)
        g2 = out[:,1].view(n,1)
        
        # R = g(Xi) + log(Oi)
        R = torch.add(g1,torch.log(time)) 
        
        S1 =  (delta*g2).sum()/n
        S2 = -(delta*R).sum()/n
        
        # Rj - Ri
        rawones = torch.ones(1,n)
        R1 = torch.mm(R,rawones)
        R2 = torch.mm(torch.t(rawones),torch.t(R))
        DR = R1 - R2 
        
        # K[(Rj-Ri)/h]
        K = normal_density(DR/h)
        Del = torch.mm(delta, rawones)
        DelK = Del*K 
        
        # (1/nh) *sum_j Deltaj * K[(Rj-Ri)/h]
        Dk = torch.sum(DelK, dim=0)/(n*h)  ## Dk would be zero as learning rate too large!
        
        # log {(1/nh) * Deltaj * K[(Rj-Ri)/h]}    
        log_Dk = torch.log(Dk)    
        
        S3 = (torch.t(delta)*log_Dk).sum()/n    
        
        # Phi((Rj-Ri)/h)
        ncdf=torch.distributions.normal.Normal(torch.tensor([0.0]), torch.tensor([1.0])).cdf
        P = ncdf(DR/h) 
        L = torch.exp(g2-g1)
        LL = torch.mm(L,rawones)
        LP_sum = torch.sum(LL*P, dim=0)/n
        Q = torch.log(LP_sum)
        
        S4 = -(delta*Q.view(n,1)).sum()/n
        
        S = S1 + S2 + S3 + S4  
        S = -S
    return S

def normal_density(a):  
    b = 0.3989423*torch.exp(-0.5*torch.pow(a,2.0))
    return b

### Loss Original Paper

In [5]:
y, linear_predictor, time, event = ah_data(type='torch')
eaftloss(linear_predictor, time, event)


tensor(1.4592, grad_fn=<NegBackward0>)

### Loss My Implementation

In [6]:
y, linear_predictor, time, event = ah_data(type='np')
#linear_predictor = linear_predictor[:,0]
linear_predictor

array([[0.67254925, 0.        ],
       [0.8607798 , 0.        ],
       [0.43557394, 0.        ],
       [0.94059044, 0.        ],
       [0.8446509 , 0.        ],
       [0.23657039, 0.        ],
       [0.7462968 , 0.        ],
       [0.99700767, 0.        ],
       [0.2818277 , 0.        ],
       [0.44495037, 0.        ]], dtype=float32)

In [7]:
eh_likelihood(
     y, linear_predictor
)

2.792222201115036

In [8]:
y, linear_predictor, time, event = ah_data(type='np')
aft_likelihood(
     y, linear_predictor[:,0]
)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
No implementation of function Function(<built-in function getitem>) found for signature:
 
 >>> getitem(array(float64, 2d, C), array(bool, 2d, C))
 
There are 22 candidate implementations:
    - Of which 20 did not match due to:
    Overload of function 'getitem': File: <numerous>: Line N/A.
      With argument(s): '(array(float64, 2d, C), array(bool, 2d, C))':
     No match.
    - Of which 2 did not match due to:
    Overload in function 'GetItemBuffer.generic': File: numba/core/typing/arraydecl.py: Line 166.
      With argument(s): '(array(float64, 2d, C), array(bool, 2d, C))':
     Rejected as the implementation raised a specific error:
       NumbaTypeError: unsupported array index type array(bool, 2d, C) in [array(bool, 2d, C)]
  raised from /Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.10/site-packages/numba/core/typing/arraydecl.py:72

During: typing of intrinsic-call at /Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.10/site-packages/xgbsurv/models/eh_aft_final.py (47)

File "../../../../miniconda3/envs/xgbsurv/lib/python3.10/site-packages/xgbsurv/models/eh_aft_final.py", line 47:
def aft_likelihood(
    <source elided>
    (_, kernel_matrix, integrated_kernel_matrix,) = difference_kernels(
        a=R_linear_predictor, b=R_linear_predictor[event_mask], bandwidth=bandwidth
        ^


## 1. Compare Pytorch loss with numpy loss

- let's keep bandwidth fixed for that
- random data

linear_predictor = np.array([[0.67254923, 0.03356795],
       [0.86077982, 0.65922692],
       [0.43557393, 0.75447972],
       [0.94059047, 0.30572004],
       [0.8446509 , 0.07916267],
       [0.23657039, 0.44693716],
       [0.74629685, 0.32637245],
       [0.99700768, 0.10225456],
       [0.28182768, 0.05405025],
       [0.44495038, 0.08454563]])

y = np.array([1, -3, -3, -4, -7,  8,  9,  -11,  13,  16])


In [None]:
# create mock data
linear_predictor = np.array([[0.67254923, 0.03356795],
       [0.86077982, 0.65922692],
       [0.43557393, 0.75447972],
       [0.94059047, 0.30572004],
       [0.8446509 , 0.07916267],
       [0.23657039, 0.44693716],
       [0.74629685, 0.32637245],
       [0.99700768, 0.10225456],
       [0.28182768, 0.05405025],
       [0.44495038, 0.08454563]])
linear_predictor = linear_predictor[:,0]
linear_predictor_torch = torch.tensor(linear_predictor, requires_grad=True)

y = np.array([[1, -3, -3, -4, -7,  8,  9,  -11,  13,  16],
              [1, -3, -3, -4, -7,  8,  9,  -11,  13,  16]]).reshape(10,2)
y_torch = torch.from_numpy(y[:,0])

## 2. Compare Pytorch gradient with gradient function

In [None]:
a1 = ah_likelihood_torch(linear_predictor_torch, y_torch)
a1.backward()
#print(linear_predictor_torch.grad)
print(linear_predictor_torch.grad.numpy())

[-0.10978369  0.0032265   0.00291     0.00932027 -0.00986356 -0.02184597
  0.00447101  0.01240073  0.07544714  0.03371756]


In [None]:
linear_predictor

array([0.67254923, 0.86077982, 0.43557393, 0.94059047, 0.8446509 ,
       0.23657039, 0.74629685, 0.99700768, 0.28182768, 0.44495038])

In [None]:
ah_objective(
    time=np.abs(y[:,0]),
    event=(y[:,0] > 0).astype(int),
    linear_predictor=linear_predictor
)[0]

array([ 0.10978369, -0.00322649, -0.00290998, -0.0093203 ,  0.00986353,
        0.02184597, -0.004471  , -0.01240071, -0.07544713, -0.03371757])

In [None]:
np.array(linear_predictor)

array([0.67254923, 0.86077982, 0.43557393, 0.94059047, 0.8446509 ,
       0.23657039, 0.74629685, 0.99700768, 0.28182768, 0.44495038])

In [None]:
y_torch.repeat(2)

tensor([  1.,  -3.,  -3.,  -4.,  -7.,   8.,   9., -11.,  13.,  16.,   1.,  -3.,
         -3.,  -4.,  -7.,   8.,   9., -11.,  13.,  16.])

In [None]:
y_torch.tile(2)

tensor([  1.,  -3.,  -3.,  -4.,  -7.,   8.,   9., -11.,  13.,  16.,   1.,  -3.,
         -3.,  -4.,  -7.,   8.,   9., -11.,  13.,  16.])

In [None]:
import math
n_samples: int = y.shape[0]
bandwidth = 1.30 * math.pow(n_samples, -0.2)
linear_predictor_torch = linear_predictor_torch[:,1]
l1= aft_likelihood_torch(y_torch, linear_predictor_torch[:,1], sample_weight=1.0, bandwidth = bandwidth )
l1.retain_grad()
l1.backward()
print(linear_predictor_torch.grad)

IndexError: too many indices for tensor of dimension 1