In [1]:
import pandas as pd
import numpy as np
import os
import time
import copy
import pathlib, tempfile

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from graphviz import Digraph
from joblib import Parallel, delayed
from scipy import stats

from survivors import metrics as metr
from survivors import constants as cnt
from survivors import criteria as crit
from numba import njit, jit

%load_ext line_profiler
%load_ext Cython

In [3]:
%%cython -a
cimport cython
from libc cimport math
cimport numpy as cnp

cnp.import_array()
import numpy as np

cdef int max(int a, int b):
    return a if a > b else b

cdef int chebyshev(int x1, int y1, int x2, int y2):
    return max(abs(x1 - x2), abs(y1 - y2))

def c_benchmark():
    a = np.random.rand(1000, 2)
    b = np.random.rand(1000, 2)
    
    for x1, y1 in a:
        for x2, y2 in b:
            chebyshev(x1, x2, y1, y2)

In [135]:
%%cython -a
cimport cython
import numpy as np
from libc cimport math
cimport numpy as cnp

cnp.import_array()
from scipy import stats

@cython.wraparound(False)
@cython.cdivision(True)
@cython.boundscheck(False)
cpdef lr_statistic(cnp.npy_uint8[:] dur_1, 
                  cnp.npy_uint8[:] dur_2, 
                  cnp.npy_uint8[:] cens_1, 
                  cnp.npy_uint8[:] cens_2, 
                  cnp.npy_uint8[:] times_range, 
                  int weightings):
    cdef int bins = times_range[1] - times_range[0] + 1
    cdef cnp.npy_uint8[:] n_1_j = np.histogram(dur_1, bins=bins, range=times_range)[0]
    cdef cnp.npy_uint8[:] n_2_j = np.histogram(dur_2, bins=bins, range=times_range)[0]
    cdef cnp.npy_uint8[:] O_1_j = np.histogram(np.multiply(dur_1, cens_1), bins=bins, range=times_range)[0]
    cdef cnp.npy_uint8[:] O_2_j = np.histogram(np.multiply(dur_2, cens_2), bins=bins, range=times_range)[0]

    cdef cnp.npy_uint8[:] N_1_j = np.cumsum(n_1_j[::-1])[::-1]
    cdef cnp.npy_uint8[:] N_2_j = np.cumsum(n_2_j[::-1])[::-1]
    cdef cnp.npy_uint8[:] ind = np.where(np.multiply(N_1_j, N_2_j) != 0)
    N_1_j = np.array(N_1_j)[ind]
    N_2_j = np.array(N_2_j)[ind]
    O_1_j = np.array(O_1_j)[ind]
    O_2_j = np.array(O_2_j)[ind]

    N_j = np.sum(N_1_j, N_2_j)
    O_j = np.sum(O_1_j, O_2_j)
    cdef cnp.npy_double[:] E_1_j = N_1_j * O_j / N_j
    cdef cnp.npy_double[:, :] res = np.zeros((N_j.shape[0], 3), dtype=np.float32)
    res[:, 1] = np.subtract(O_1_j, E_1_j)
    res[:, 2] = E_1_j * (N_j - O_j) * N_2_j / (N_j * (N_j - 1))
    res[:, 0] = 1.0
    if weightings == 2:
        res[:, 0] = N_j
    elif weightings == 3:
        res[:, 0] = np.sqrt(N_j)
    elif weightings == 4:
        res[:, 0] = np.cumprod((1.0 - O_j / (N_j + 1)))
    logrank = np.power(np.multiply(res[:, 0], res[:, 1]).sum(), 2) / (np.multiply(np.multiply(res[:, 0], res[:, 0]), res[:, 2]).sum())
    return logrank

def weight_lr_fast(dur_A, dur_B, cens_A=None, cens_B=None, weightings=""):
        times = np.unique(np.hstack((dur_A, dur_B)))
        dur_A = np.searchsorted(times, dur_A) + 1
        dur_B = np.searchsorted(times, dur_B) + 1
        times_range = np.array([1, times.shape[0]])
        if cens_A is None:
            cens_A = np.ones(dur_A.shape[0])
        if cens_B is None:
            cens_B = np.ones(dur_B.shape[0])
        d = {"logrank": 1, "wilcoxon": 2, "tarone-ware": 3, "peto": 4}
        weightings = d.get(weightings, 1)
        logrank = lr_statistic(dur_A.astype(np.uint8), 
                               dur_B.astype(np.uint8), 
                               cens_A.astype(np.uint8), 
                               cens_B.astype(np.uint8), 
                               times_range.astype(np.uint8), 
                               np.uint8(weightings))
        pvalue = stats.chi2.sf(logrank, df=1)
        return pvalue

In [136]:
dur_A_ = np.random.choice(10000, 10000)
cens_A_ = np.random.choice(2, 10000)
dur_B_ = np.random.choice(10000, 10000)
cens_B_ = np.random.choice(2, 10000)
weight_lr_fast(dur_A_, dur_B_, cens_A_, cens_B_, 2)

ValueError: Buffer dtype mismatch, expected 'npy_uint8' but got 'long long'

In [128]:
lr_statistic(np.array([1, 2]).astype(np.int_),
             np.array([3, 4]).astype(np.int_),
             np.array([1, 0]).astype(np.int_),
             np.array([0, 1]).astype(np.int_),
             np.array([1, 4]).astype(np.int_),
             np.int_(1),
            )

ValueError: Buffer dtype mismatch, expected 'npy_uint8' but got 'long long'

In [77]:
np.int_t

AttributeError: module 'numpy' has no attribute 'int_t'