## Optimizations for computing conflicting columns

1. Cast the datatypes to bool to avoid having to compute boolean masks of 0 and 1 entries in matrix columns (46% speedup)
2. Transpose the matrix first to perform operations row-wise and exploit locality (~2.3% speedup)
3. Short circuiting the computation of conflict columns (20% speedup)

Unsuccessful improvements:
1. Packing bits using `np.packbits()` did not noticeably speed up the code.  


In [2]:
import numpy as np
from lower_bounds_benchmark import is_conflict
from scphylo.datasets import melanoma20
from time import time

In [3]:
def is_conflict_row_wise(X_T, p, q):
    """Check if rows p and q of X.T have conflicts."""
    col_p = X_T[p]
    col_q = X_T[q]
    col_p_is_1 = col_p == 1
    is10 = np.any(col_p_is_1 & (col_q == 0))
    col_q_is_1 = col_q == 1
    is11 = np.any(col_p_is_1 & col_q_is_1)
    is01 = np.any((col_p == 0) & col_q_is_1)
    return is10 and is01 and is11

In [4]:
def manual_transpose(X):
    m, n = X.shape
    rv = np.empty((n, m), dtype=np.bool, order='C')
    for i in range(m):
        for j in range(n):
            rv[j, i] = X[i, j]
    return rv

In [5]:
num_repeat = 40
num_cols = 200

In [30]:
data = melanoma20().X.astype(np.bool)[:, :num_cols]
m, n = data.shape
colwise_function_time = time()
for _ in range(num_repeat):
    for i in range(n):
        for j in range(i+1, n):
            is_conflict(data, i, j)
colwise_function_time = time() - colwise_function_time
print(f"Average time to compute column-wise conflicts: {colwise_function_time/num_repeat}")



Average time to compute column-wise conflicts: 0.4885720670223236


In [20]:
data = melanoma20().X.astype(np.bool)[:, :num_cols]
m, n = data.shape
colwise_time = time()
for _ in range(num_repeat):
    for i in range(n):
        for j in range(i+1, n):
            is10 = np.any(data[:, i] & ~data[:, j])
            is11 = np.any(data[:, i] & data[:, j])
            is01 = np.any(~data[:, i] & data[:, j])
            is10 and is11 and is01
colwise_time = time() - colwise_time
print(f"Average time to compute column-wise conflicts: {colwise_time/num_repeat}")



Average time to compute column-wise conflicts: 0.38123714327812197


In [16]:
data_T = melanoma20().X.T.astype(np.bool)[:num_cols, :]
rowwise_time = time()
for _ in range(num_repeat):
    for i in range(n):
        for j in range(i+1, n):
            is10 = np.any(data_T[i] & ~data_T[j])
            is11 = np.any(data_T[i] & data_T[j])
            is01 = np.any(~data_T[i] & data_T[j])
            is10 and is11 and is01
rowwise_time = time() - rowwise_time
print(f"Time to compute row-wise conflicts: {rowwise_time/num_repeat}")



Time to compute row-wise conflicts: 0.3759960770606995


In [32]:
def is_conflict_early_exit(data_T, i, j):
    is10 = np.any(data_T[i] & ~data_T[j])
    if not is10:
        return False
    is11 = np.any(data_T[i] & data_T[j])
    if not is11:
        return False
    is01 = np.any(~data_T[i] & data_T[j])
    if not is01:
        return False
    return True
    
data_T = melanoma20().X.T.astype(np.bool)[:num_cols, :]
rowwise_time_early_exit = time()
for _ in range(num_repeat):
    for i in range(n):
        for j in range(i+1, n):
            is_conflict_early_exit(data_T, i, j)
rowwise_time_early_exit = time() - rowwise_time_early_exit
print(f"Time to compute row-wise conflicts: {rowwise_time_early_exit/num_repeat}")



Time to compute row-wise conflicts: 0.3220328688621521


In [28]:

data_T = melanoma20().X.T.astype(np.bool)[:num_cols, :]
packed = np.packbits(data_T, axis=1)
packed.shape



(200, 3)

In [33]:
def is_conflict_early_exit(data_T, i, j):
    return ((np.any(data_T[i] & ~data_T[j])) 
            and (np.any(data_T[i] & data_T[j]))
            and(np.any(~data_T[i] & data_T[j]))
    )
    
data_T = melanoma20().X.T.astype(np.bool)[:num_cols, :]
# Pack each row (formerly column) into uint8 bitvectors
data_T = np.packbits(data_T, axis=1)
rowwise_time_early_exit = time()
for _ in range(num_repeat):
    for i in range(n):
        for j in range(i+1, n):
            is_conflict_early_exit(data_T, i, j)
rowwise_time_early_exit = time() - rowwise_time_early_exit
print(f"Time to compute row-wise conflicts: {rowwise_time_early_exit/num_repeat}")



Time to compute row-wise conflicts: 0.32994045615196227


In [10]:
# Check for time improvements by transposing or storing the array data differently
# data_T = np.ascontiguousarray(melanoma20().X.T, dtype=np.bool)[:num_cols, :]
# rowwise_time = time()
# for _ in range(num_repeat):
#     for i in range(n):
#         for j in range(i+1, n):
#             is10 = np.any(data_T[i] & ~data_T[j])
#             is11 = np.any(data_T[i] & data_T[j])
#             is01 = np.any(~data_T[i] & data_T[j])
#             is10 and is11 and is01
# rowwise_time = time() - rowwise_time
# print(f"Time to compute row-wise conflicts: {rowwise_time/num_repeat}")

# data_T = manual_transpose(melanoma20().X).astype(np.bool)[:num_cols, :]
# rowwise_time = time()
# for _ in range(num_repeat):
#     for i in range(n):
#         for j in range(i+1, n):
#             is10 = np.any(data_T[i] & ~data_T[j])
#             is11 = np.any(data_T[i] & data_T[j])
#             is01 = np.any(~data_T[i] & data_T[j])
#             is10 and is11 and is01
# rowwise_time = time() - rowwise_time
# print(f"Time to compute row-wise conflicts: {rowwise_time/num_repeat}")

In [31]:
print(f"Rowwise runs {colwise_function_time / rowwise_time:.3f}x faster than colwise with helper function")
print(f"Rowwise runs {colwise_time / rowwise_time:.3f}x faster than colwise")
print(f"Rowwise early exit runs {colwise_function_time / rowwise_time_early_exit:.3f}x faster than colwise with helper function")
print(f"Rowwise early exit runs {colwise_time / rowwise_time_early_exit:.3f}x faster than colwise")

Rowwise runs 1.299x faster than colwise with helper function
Rowwise runs 1.014x faster than colwise
Rowwise early exit runs 1.537x faster than colwise with helper function
Rowwise early exit runs 1.200x faster than colwise


## Speed up vertex cover

Since our vertex cover instances are unweighted, we can skip out on some steps used in `networkx.algorithms.min_weighted_vertex_cover()`

In [None]:
from vc import vertex_cover_pp, make_graph

data = melanoma20().X.astype(np.bool)[:, :num_cols]
vc_prev = time()
for _ in range(num_repeat):
    vertex_cover_pp(data)
vc_prev = time() - vc_prev

def min_unweighted_vertex_cover(G):
    """For unweightred case, no need to use local ratio techniques."""
    cover = set()
    for u, v in G.edges():
        if u in cover or v in cover:
            continue
        cover.add(u)
    return cover

def vertex_cover_pp_faster(A):
    """Returns
    1. a lower bound on the number of bit flips required to make A a
    perfect phylogeny by solving a related weighted vertex cover instance.
    2. a set of (i,j) indices of bits flipped.
    """
    G = make_graph(A)
    vc = min_unweighted_vertex_cover(G)
    flipped_bits = len(vc)
    return int(np.ceil(flipped_bits / 2)), list(G.nodes)
    
vc_new = time()
for _ in range(num_repeat):
    vertex_cover_pp_faster(data)
vc_new = time() - vc_new


print(f"New VC is a {vc_prev/vc_new:.3f}x speedup")



New VC is a 1.268x speedup
