In [1]:
#Link : https://github.com/nikitasigal/biclustlib
# Play around with the algorithms from biclustlib
import numpy as np

from biclustlib.algorithms import ChengChurchAlgorithm
from biclustlib.datasets import load_yeast_tavazoie

# load yeast data used in the original Cheng and Church's paper
data = load_yeast_tavazoie().values

# missing value imputation suggested by Cheng and Church
missing = np.where(data < 0.0)
data[missing] = np.random.randint(low=0, high=800, size=len(missing[0]))

# creating an instance of the ChengChurchAlgorithm class and running with the parameters of the original study
cca = ChengChurchAlgorithm(num_biclusters=100, msr_threshold=300.0, multiple_node_deletion_threshold=1.2)
biclustering = cca.run(data)
print(biclustering)

Bicluster(rows=[  11   17   19   22   24   26   27   36   37   38   40   41   43   44
   53   55   63   67   82   85   87   91   92   93   94   96   98  100
  101  106  109  110  111  112  121  122  124  129  139  140  141  143
  147  149  155  161  166  173  175  176  179  186  192  195  199  205
  207  209  210  213  218  222  225  229  231  232  233  240  245  251
  252  257  258  262  265  266  269  270  271  279  280  285  286  287
  294  297  299  313  314  315  316  325  329  331  337  340  342  346
  349  357  370  372  375  386  390  395  406  411  420  422  430  436
  447  450  451  455  456  470  471  472  474  479  482  486  487  488
  490  491  493  497  504  512  515  517  535  538  544  548  551  554
  559  560  561  567  574  577  581  586  590  591  594  599  602  603
  606  610  617  618  621  630  631  634  637  638  639  644  645  648
  649  652  653  654  656  660  667  679  681  692  694  696  700  708
  711  712  729  734  735  738  740  742  745  752  763  764  

In [7]:
#Trying with binary data
data = np.random.choice(2,(300,50),[0.6,0.4])

In [8]:
#Okay let us try with bibitclustering
from biclustlib.algorithms import BitPatternBiclusteringAlgorithm

In [9]:
bibit = BitPatternBiclusteringAlgorithm(2,4) #min number of rows and columns that must be present in a bicluster
biclustering = bibit.run(data)


In [10]:
max_size = 0
max_biclust_no = None
max_temp_matrix = None
for i in range(len(biclustering.biclusters)):
    temp_matrix = data[np.ix_(biclustering.biclusters[i].rows, biclustering.biclusters[i].cols)]
    num_zero = temp_matrix.size - np.count_nonzero(temp_matrix)
    if num_zero != 0:
        print(f"Found at {i}")
    if max_size < temp_matrix.size:
        max_size = temp_matrix.size
        max_temp_matrix = temp_matrix
        max_biclust_no = i

In [11]:
max_size

116

In [12]:
max_biclust_no

2311

In [13]:
max_temp_matrix.shape

(29, 4)

In [14]:
max_temp_matrix #Notice the all 1s

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])