Skip to content

Commit

Permalink
ADD sparse prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
jmschrei committed Aug 23, 2017
1 parent 355f1de commit b78addc
Showing 1 changed file with 41 additions and 17 deletions.
58 changes: 41 additions & 17 deletions rambutan/rambutan.py
Expand Up @@ -182,7 +182,7 @@ def __init__(self, name='rambutan', iteration=None, model=None,
self.use_dist = use_dist
self.verbose = verbose

def predict(self, sequence, dnase, regions=None, ctxs=[0]):
def predict(self, sequence, dnase, regions=None, ctxs=[0], sparse=False):
"""Make predictions and return the matrix of probabilities.
Rambutan will make a prediction for each pair of genomic loci defined in
Expand Down Expand Up @@ -211,7 +211,7 @@ def predict(self, sequence, dnase, regions=None, ctxs=[0]):
where there are no n or N symbols in the fasta file. Default
is None.
ctxs: list, optional
ctxs : list, optional
The contexts of the gpus to use for prediction. Currently
prediction is only supported on gpus and not cpus due to
the time it would take for prediction. For example, if you
Expand All @@ -220,6 +220,10 @@ def predict(self, sequence, dnase, regions=None, ctxs=[0]):
ctxs=[0, 1, 3] and the prediction task will be naturally
parallelized across your 3 gpus with a linear speedup.
sparse : bool, optional
Whether to return three arrays, the rows, columns, and values,
or the full dense matrix. Sparse is useful for large matrices.
Returns
-------
y : numpy.ndarray, shape=(m, m)
Expand Down Expand Up @@ -255,21 +259,41 @@ def predict(self, sequence, dnase, regions=None, ctxs=[0]):
self.use_seq, self.use_dnase, self.use_dist, self.min_dist,
self.max_dist, self.batch_size, self.verbose) for ctx in ctxs)

n = int(regions.max()) / 1000 + 1
y = numpy.zeros((n, n))

for ctx in ctxs:
with open('.rambutan.predictions.{}.txt'.format(ctx), 'r') as infile:
for line in infile:
mid1, mid2, p = line.split()
mid1 = (int(float(mid1)) - 500) / 1000
mid2 = (int(float(mid2)) - 500) / 1000
p = float(p)
y[mid1, mid2] = p

os.system('rm .rambutan.predictions.{}.txt'.format(ctx))

return y
if sparse == False:
n = int(regions.max()) / 1000 + 1
y = numpy.zeros((n, n))

for ctx in ctxs:
with open('.rambutan.predictions.{}.txt'.format(ctx), 'r') as infile:
for line in infile:
mid1, mid2, p = line.split()
mid1 = (int(float(mid1)) - 500) / 1000
mid2 = (int(float(mid2)) - 500) / 1000
p = float(p)
y[mid1, mid2] = p

os.system('rm .rambutan.predictions.{}.txt'.format(ctx))

return y

else:
rows, cols, values = [], [], []
for ctx in ctxs:
with open('.rambutan.predictions.{}.txt'.format(ctx), 'r') as infile:
for line in infile:
mid1, mid2, p = line.split()
mid1, mid2, p = int(mid1), int(mid2), float(p)

rows.append(mid1)
cols.append(mid2)
values.append(p)

os.system('rm .rambutan.predictions.{}.txt'.format(ctx))

rows = numpy.array(rows)
cols = numpy.array(cols)
values = numpy.array(values)
return rows, cols, values

def fit(self, sequence, dnase, contacts, regions=None, validation_contacts=None,
training_chromosome=None, validation_chromosome=None, ctxs=[0],
Expand Down

0 comments on commit b78addc

Please sign in to comment.