In [53]:
import pandas as pd
import numpy as np
import os
import time
import copy
import pathlib, tempfile

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from graphviz import Digraph
from joblib import Parallel, delayed
from scipy import stats

from survivors import metrics as metr
from survivors import constants as cnt
from survivors import criteria as crit

%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [592]:
@jit
def count_N_O(n_1_j, n_2_j, O_1_j, O_2_j):
    N_1_j = np.cumsum(n_1_j[::-1])[::-1]
    N_2_j = np.cumsum(n_2_j[::-1])[::-1]
    ind = np.where(N_1_j * N_2_j != 0)
    N_1_j = N_1_j[ind]
    N_2_j = N_2_j[ind]
    O_1_j = O_1_j[ind]
    O_2_j = O_2_j[ind]

    N_j = N_1_j + N_2_j
    O_j = O_1_j + O_2_j
    
    E_1_j = N_1_j*O_j/N_j
    res = np.zeros((N_j.shape[0], 3), dtype=np.float32)
    res[:, 1] = O_1_j - E_1_j
    res[:, 2] = E_1_j*(N_j - O_j) * N_2_j/(N_j*(N_j - 1))
    return N_j, O_j, res

@jit
def get_lr(res):
    return np.power((res[:, 0]*res[:, 1]).sum(), 2) / ((res[:, 0]*res[:, 0]*res[:, 2]).sum())

def lr_statistic(dur_1, dur_2, cens_1, cens_2, times, weightings):
    times_range = (times.min(), times.max())
    bins = times_range[1] - times_range[0] + 1
    n_1_j = np.histogram(dur_1, bins=bins,
                         range=times_range)[0]
    n_2_j = np.histogram(dur_2, bins=bins, 
                         range=times_range)[0]
    O_1_j = np.histogram(dur_1*cens_1, bins=bins, #weights=cens_1,
                         range=times_range)[0]
    O_2_j = np.histogram(dur_2*cens_2, bins=bins, #weights=cens_2,
                         range=times_range)[0]
    N_j, O_j, res = count_N_O(n_1_j, n_2_j, O_1_j, O_2_j)
    res[:, 0] = 1
    if weightings == "wilcoxon":
        res[:, 0] = N_j
    elif weightings == "tarone-ware":
        res[:, 0] = np.sqrt(N_j)
    elif weightings == "peto":
        res[:, 0] = np.cumprod((1.0 - float(O_j)/(N_j+1)))
        
    logrank = get_lr(res)
    return logrank

def weight_lr_fast(dur_A, dur_B, cens_A = None, cens_B = None, weightings = ""):
    if cens_A is None:
        cens_A = np.ones(dur_A.shape[0])
    if cens_B is None:
        cens_B = np.ones(dur_B.shape[0])
    times = np.union1d(np.unique(dur_A), np.unique(dur_B))
    logrank = lr_statistic(dur_A, dur_B, cens_A, cens_B, times, weightings)
    pvalue = stats.chi2.sf(logrank, df=1)
    return pvalue

In [620]:
dur_A_ = np.random.choice(10000, 10000)
cens_A_ = np.random.choice(2, 10000)
dur_B_ = np.random.choice(10000, 10000)
cens_B_ = np.random.choice(2, 10000)
times = np.union1d(np.unique(dur_A_), np.unique(dur_B_))
times

array([   1,    2,    3, ..., 9997, 9998, 9999])

In [621]:
weight_lr_fast(dur_A_, dur_B_, cens_A_, cens_B_)

0.03762169720052538

In [622]:
crit.weight_lr_fast(dur_A_, dur_B_, cens_A_, cens_B_)

0.03762169720052538

In [623]:
times = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
dur_A = np.array([0, 2, 3, 4, 5, 3, 9, 3])
cens_A = np.array([1, 1, 0, 1, 0, 0, 0, 0])
dur_B = np.array([4, 5, 6, 7, 8, 1, 3])
cens_B = np.array([1, 1, 1, 0, 1, 1, 1])

In [624]:
@jit
def numb_coeffs_t_j(dur_A, dur_B, cens_A, cens_B, t_j, weightings):
    N_1_j = (dur_A >= t_j).sum()
    N_2_j = (dur_B >= t_j).sum()
    if N_1_j == 0 or N_2_j == 0:
        return 0, 0, 0
    O_1_j = ((dur_A == t_j) * cens_A).sum()  # np.where(dur_A == t_j, cens_A,0).sum()
    O_2_j = ((dur_B == t_j) * cens_B).sum()  # np.where(dur_B == t_j, cens_B,0).sum()
    
    N_j = N_1_j + N_2_j
    O_j = O_1_j + O_2_j
    E_1_j = N_1_j*O_j/N_j
    w_j = 1
    if weightings == "wilcoxon":
        w_j = N_j
    elif weightings == "tarone-ware":
        w_j = np.sqrt(N_j)
    elif weightings == "peto":
        w_j = (1.0 - float(O_j)/(N_j+1))
    
    num = O_1_j - E_1_j
    denom = E_1_j*(N_j - O_j) * N_2_j/(N_j*(N_j - 1))
    return w_j, num, denom

@jit
def numb_lr_statistic(dur_A, dur_B, cens_A, cens_B, times, weightings):
    res = np.zeros((times.shape[0], 3), dtype=np.float32)
    for j, t_j in enumerate(times):
        res[j] = numb_coeffs_t_j(dur_A, dur_B, cens_A, cens_B, t_j, weightings)
    
    if weightings == "peto":
        res[:, 0] = np.cumprod(res[:, 0])
#     print(res)
    # logrank = np.dot(res[:, 0], res[:, 1])**2 / np.dot(res[:, 0]*res[:, 0], res[:, 2])
    logrank = np.power((res[:, 0]*res[:, 1]).sum(), 2) / ((res[:, 0]*res[:, 0]*res[:, 2]).sum())
#     print(np.power((res[:, 0]*res[:, 1]).sum(), 2), ((res[:, 0]*res[:, 0]*res[:, 2]).sum()))
#     print(logrank)
    return logrank

def numb_weight_lr_fast(dur_A, dur_B, cens_A = None, cens_B = None, weightings = ""):
    try:
        if cens_A is None:
            cens_A = np.ones(dur_A.shape[0])
        if cens_B is None:
            cens_B = np.ones(dur_B.shape[0])

        #     a1 = np.unique(dur_A)
        #     a2 = np.unique(dur_B)
        #     times = np.unique(np.clip(np.union1d(a1,a2), 0, np.min([a1.max(), a2.max()])))
        times = np.union1d(np.unique(dur_A), np.unique(dur_B))
        logrank = numb_lr_statistic(dur_A, dur_B, cens_A, cens_B, times, weightings)
        pvalue = stats.chi2.sf(logrank, df=1)
        return pvalue
    except:
        return 1.0

In [625]:
numb_weight_lr_fast(dur_A, dur_B, cens_A, cens_B)

0.5281722982828967

In [626]:
weight_lr_fast(dur_A, dur_B, cens_A, cens_B)

0.5931646391562364

In [627]:
crit.weight_lr_fast(dur_A, dur_B, cens_A, cens_B)

0.5281722982828967

In [129]:
np.cumsum(np.array([1, 2, 3, 4])[::-1])[::-1]

array([10,  9,  7,  4])

In [215]:
res = weight_lr_fast(dur_A, dur_B, cens_A, cens_B)
((res[:, 0]*res[:, 0]*res[:, 2]).sum())

[[ 1.         -0.5         0.25      ]
 [ 1.          0.46153846  0.24852072]
 [ 1.          0.5         0.6136364 ]
 [ 1.          0.25        0.4017857 ]
 [ 1.         -0.33333334  0.22222222]
 [ 1.         -0.25        0.1875    ]
 [ 1.          0.          0.        ]
 [ 1.         -0.5         0.25      ]]
0.1382314766966033 2.173665
0.06359372376451176


2.173665

In [230]:
sum(res[:, 0]*res[:, 0]*res[:, 2])

2.173665016889572

In [560]:
times_range = (times.min(), times.max())
bins = times_range[1] - times_range[0] + 1
n_1_j = np.histogram(dur_A, bins=bins,
                     range=times_range)[0]
n_2_j = np.histogram(dur_B, bins=bins, 
                     range=times_range)[0]
O_1_j = np.histogram(dur_A*cens_A, bins=bins, #weights=cens_1,
                     range=times_range)[0]
O_2_j = np.histogram(dur_B*cens_B, bins=bins, #weights=cens_2,
                     range=times_range)[0]

In [561]:
print(n_1_j, n_2_j, O_1_j, O_2_j)

[0 0 1 ... 0 0 0] [0 1 0 ... 0 0 0] [5 0 1 ... 0 0 0] [1 1 0 ... 0 0 0]


In [555]:
dur_A

array([  2,   3,   4,   5,   3, 100,   3])

In [659]:
class _A_(object):
    __slots__ = ("a", "b", "c")
    def __init__(self):
        self.a = 3
        self.b = 3
        self.c = 3
        
    def find(self, param):
        if param in self.__dict__:
            return getattr(self, param, np.nan)

In [660]:
_a_ = _A_()

In [661]:
_a_.find("c")

AttributeError: '_A_' object has no attribute '__dict__'

In [662]:
"c" in _a_.__dict__

AttributeError: '_A_' object has no attribute '__dict__'