In [7]:
import numpy as np
from utils import *
import pybnb
import operator
from collections import defaultdict
from itertools import chain, combinations

ms_package_path = '/home/frashidi/software/bin/ms'
csp_solver_path = '/home/frashidi/software/temp/csp_solvers/maxino/code/build/release/maxino'

In [2]:
ground, noisy, (countFN,countFP,countNA) = get_data(n=10, m=8, seed=1, fn=0.20, fp=0, na=0, 
                                                    ms_package_path=ms_package_path)
# print(noisy)

solution, (flips_0_1, flips_1_0, flips_2_0, flips_2_1) = PhISCS_I(noisy, beta=0.20, alpha=0.0001)
# print(solution)

# solution = PhISCS_B(noisy, beta=0.20, alpha=0.0000000001, csp_solver_path=csp_solver_path)
# print(solution)

print(np.where(solution != noisy))

(array([0, 0, 1, 3, 5, 5, 7]), array([2, 6, 2, 2, 2, 6, 2]))


In [8]:
def number_of_conflicts(D):
        noc = 0
        for p in range(D.shape[1]):
            for q in range(p + 1, D.shape[1]):
                oneone = 0
                zeroone = 0
                onezero = 0
                for r in range(D.shape[0]):
                    if D[r][p] == 1 and D[r][q] == 1:
                        oneone += 1
                    if D[r][p] == 0 and D[r][q] == 1:
                        zeroone += 1
                    if D[r][p] == 1 and D[r][q] == 0:
                        onezero += 1
                noc += oneone*zeroone*onezero
        return noc

In [40]:
def give_me_the_lower_bound(noisy):
    def important_columns_in_conflicts(D):
        important_columns = defaultdict(lambda: 0)
        for p in range(D.shape[1]):
            for q in range(p + 1, D.shape[1]):
                oneone = 0
                zeroone = 0
                onezero = 0
                for r in range(D.shape[0]):
                    if D[r][p] == 1 and D[r][q] == 1:
                        oneone += 1
                    if D[r][p] == 0 and D[r][q] == 1:
                        zeroone += 1
                    if D[r][p] == 1 and D[r][q] == 0:
                        onezero += 1
                if oneone*zeroone*onezero > 0:
                    important_columns[(p,q)] += oneone*zeroone*onezero
        return important_columns
    
    def get_partinion(D):
        icic = important_columns_in_conflicts(D)
        sorted_icic = sorted(icic.items(), key=operator.itemgetter(1), reverse=True)
        pairs = [sorted_icic[0][0]]
        elements = [sorted_icic[0][0][0], sorted_icic[0][0][1]]
        sorted_icic.remove(sorted_icic[0])
        for x in sorted_icic[:]:
            notFound = True
            for y in x[0]:
                if y in elements:
                    sorted_icic.remove(x)
                    notFound = False
                    break
            if notFound:
                pairs.append(x[0])
                elements.append(x[0][0])
                elements.append(x[0][1])
        #print(sorted_icic, pairs, elements)
        partitions = []
        for x in pairs:
            partitions.append(D[:,x])
        return partitions
    
    def give_me_the_lower_bound_helper(D):        
        def conflicts_set(D):
            all_conf = []
            for p in range(D.shape[1]):
                for q in range(p + 1, D.shape[1]):
                    conf_oneone = []
                    conf_zeroone = []
                    conf_onezero = []
                    for r in range(D.shape[0]):
                        if D[r][p] == 1 and D[r][q] == 1:
                            conf_oneone.append(r)
                        if D[r][p] == 0 and D[r][q] == 1:
                            conf_zeroone.append(r)
                        if D[r][p] == 1 and D[r][q] == 0:
                            conf_onezero.append(r)
                    for r1 in conf_oneone:
                        for r2 in conf_zeroone:
                            for r3 in conf_onezero:
                                #print(p,q, r1, r2, r3)
                                all_conf.append(set([r1,r2,r3]))
            return all_conf
        
        def powerset(iterable):
            xs = list(iterable)
            return chain.from_iterable(combinations(xs,n) for n in range(len(xs)+1))
    
        rows_set = range(D.shape[0])
        for subset in map(set, powerset(set(rows_set))):
            if len(subset) == 0:
                continue
            all_conf = conflicts_set(D)
            if len(all_conf) == 0:
                return 0
            catch_subset = 0
            for conf in all_conf:
                if subset.issubset(conf):
                    catch_subset += 1
            if catch_subset == len(all_conf):
                if len(subset) == 1:
                    return 1
                else:
                    return int(np.ceil(len(subset)/np.log2(len(subset))))
            return int(np.ceil(len(rows_set)/np.log2(len(rows_set))))
    
    #return give_me_the_lower_bound_helper(noisy)
    if number_of_conflicts(noisy) == 0:
        return 0
    LB = []
    for D in get_partinion(noisy):
        LB.append(give_me_the_lower_bound_helper(D))
    return sum(LB)

In [41]:
noisy = np.array([
    [0,1,1,0],
    [1,0,0,1],
    [1,1,0,0],
    [0,0,1,0]
])

In [42]:
class PhISCS(pybnb.Problem):
    def __init__(self, I):
        self.I = I
        self.X = np.where(self.I == 0)
        self.flip = 0
        self.idx = 0
    
    def sense(self):
        return pybnb.minimize

    def objective(self):
        nc = number_of_conflicts(self.I)
        if nc == 0:
            return 0
        else:
            return 2*nc

    def bound(self):
        return give_me_the_lower_bound(self.I)

    def save_state(self, node):
        node.state = (self.I, self.idx, self.flip)

    def load_state(self, node):
        self.I, self.idx, self.flip = node.state

    def branch(self):
        if self.idx < len(self.X[0]):
            node = pybnb.Node()
            I = self.I.copy()
            x = self.X[0][self.idx]
            y = self.X[1][self.idx]
            I[x, y] = 1
            node.state = (I, self.idx+1, self.flip+1)
            yield node
            
            node = pybnb.Node()
            I = self.I.copy()
            x = self.X[0][self.idx]
            y = self.X[1][self.idx]
            I[x, y] = 0
            node.state = (I, self.idx+1, self.flip)
            yield node

problem = PhISCS(noisy)
results = pybnb.solve(problem)

print(results.best_node.state)

Starting branch & bound solve:
 - dispatcher pid: 66378 (phi.cs.indiana.edu)
 - worker processes: 1
--------------------------------------------------------------------------------------------------------------------------
         Nodes        |                      Objective Bounds                       |              Work              
      Expl    Unexpl  |      Incumbent           Bound    Rel. Gap         Abs. Gap | Time (s)  Nodes/Sec Imbalance   Idle
         0         1  |            inf            -inf         inf%             inf |      0.0       0.00     0.00%      0
*        1         2  |              4               1   75.000000%               3 |      0.0     593.00     0.00%      0
*       40        19  |              2               1   50.000000%               1 |      0.0    3154.69     0.00%      0
*       50         0  |              0               0    0.000000%               0 |      0.0    2986.42     0.00%      0
--------------------------------------------

In [33]:
a = np.array([[1,1,1,0],
    [1,1,0,1],
    [1,1,0,1],
    [1,1,1,0]])
is_conflict_free(a)

True

–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

In [16]:
# import pandas as pd

# df = pd.DataFrame(noisy)
# df.columns = ['mut'+str(i) for i in range(noisy.shape[1])]
# df.index = ['cell'+str(i) for i in range(noisy.shape[0])]
# df.index.name = 'cellID/mutID'
# df.to_csv('noisy.SC', sep='\t')

# df = pd.read_csv(file, index_col=0, sep='\t')

[1] [ms package (paper)](https://academic.oup.com/bioinformatics/article/18/2/337/225783)  
[2] [ms package (download)](http://home.uchicago.edu/~rhudson1/source/mksamples.html)  
[3] [csp solvers](http://mse17.cs.helsinki.fi/descriptions.html)  
[4] [PhISCS](https://www.biorxiv.org/content/early/2018/07/25/376996)  

For installing maxino go to the url of [3] and then `wget` the `maxino` package. Then `unzip` it. Ater that go to the code folder and just run `make`. Then change `csp_solver_path` accordingly.  

n = number of cells  
m = number of mutations  
seed = is a seed number of generating the ground truth by ms package (not important leave it as 1)  
fn = false negative rate  
fp = false positive rate  
na = missing value rate