Skip to content

Commit

Permalink
Removed most methods to find enriched contacts and left only observed…
Browse files Browse the repository at this point in the history
… vs. expected and z-score. The other methods are slow and require a running R server in the background.
  • Loading branch information
fidelram committed Apr 3, 2017
1 parent 8037c65 commit 7d47403
Showing 1 changed file with 11 additions and 100 deletions.
111 changes: 11 additions & 100 deletions hicexplorer/hicFindEnrichedContacts.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from __future__ import division
import argparse
import numpy as np
import sys
from hicexplorer import HiCMatrix
import hicexplorer.parserCommon
from hicexplorer.utilities import transformMatrix, applyFdr, getPearson
from hicexplorer._version import __version__


Expand All @@ -17,33 +14,21 @@ def parse_arguments(args=None):
parser = argparse.ArgumentParser(
parents=[parent_parser],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Applies a transformation to a Hi-C matrix.')

parser.add_argument('--originalMat',
help='File name containing the original hic matrix',
type=argparse.FileType('r'))
description='Identifies enriched contacts by computing a observe vs. expected or a z-score matrix')

parser.add_argument('--outFileName', '-o',
help='File name to save the resulting matrix ',
help='File name to save the resulting matrix',
type=hicexplorer.parserCommon.writableFile,
required=True)

parser.add_argument(
'--method',
help='method to transform the matrix values',
choices=['z-score', 't-score', 'residuals', 'obs/exp', 'p-value',
'pearson', 'nbinom-p-value', 'nbinom-expected',
'nbinom-est-dist', 'log-norm', 'chi-squared', 'none'],
help='Method to transform the matrix values',
choices=['z-score', 'obs/exp'],
required=True)

parser.add_argument(
'--applyFdr',
help='If set the computed values are corrected using fdr. '
'Should only be used with the methods that produce pvalues.',
action='store_true')

parser.add_argument(
'--perChromosome',
'--perchr',
help='Default is to fit distributions per each distance. Setting this '
'option will fit distributions per distance per chromosome',
action='store_true')
Expand All @@ -53,23 +38,10 @@ def parse_arguments(args=None):
help='If set, diagonal counts are not included',
action='store_true')

parser.add_argument(
'--outFormat',
help='Output format',
choices=['hdf5', 'dekker'],
default='hdf5')

parser.add_argument(
'--chromosomes',
help='List of chromosomes to be included in transformation '
'correction.',
default=None,
nargs='+')

parser.add_argument(
'--depth',
help='Depth (in base pairs) up to which the computations will be carried out. A depth of 10.0000 bp '
'means that any computations involving points that are over 10kbp are not considered.',
'means that any computations involving bins that are over 10kbp apart are not considered.',
type=int,
default=None)

Expand All @@ -79,82 +51,21 @@ def parse_arguments(args=None):
return parser


def main():
"""
collects all arguments and executes
the appropriate functions
"""
args = parse_arguments().parse_args()
def main(args=None):
args = parse_arguments().parse_args(args)

hic_ma = HiCMatrix.hiCMatrix(args.matrix)
if args.originalMat:
orig_ma = HiCMatrix.hiCMatrix(args.originalMat.name)
else:
orig_ma = None

try:
hic_ma.maskBins(hic_ma.nan_bins)
except AttributeError:
pass

# remove unwanted Chrs or select a given chromosome
# in case is given
hic_ma.filterUnwantedChr()
if args.originalMat:
orig_ma.filterUnwantedChr()
if args.chromosomes:
hic_ma.keepOnlyTheseChr(args.chromosomes)
if args.originalMat:
orig_ma.reorderChromosomes(hic_ma.chrBinBoundaries.keys())

if args.skipDiagonal:
hic_ma.diagflat()
if args.originalMat:
orig_ma.diagflat()

max_depth_in_bins = None
if args.depth:
binsize = hic_ma.getBinSize()
if args.depth < binsize:
exit("Please specify a depth larger than bin size ({})".format(binsize))
max_depth_in_bins = int(args.depth / binsize)
import scipy.sparse
# work only with the upper matrix
# and remove all pixels that are beyond
# max_depth_in_bis
# (this is done by subtracting a second sparse matrix
# that contains only the upper matrix that wants to be removed.
hic_ma.matrix = scipy.sparse.triu(hic_ma.matrix, k=0, format='csr') - \
scipy.sparse.triu(hic_ma.matrix, k=max_depth_in_bins, format='csr')
hic_ma.matrix.eliminate_zeros()

if args.method == 'obs/exp':
hic_ma.convert_to_obs_exp_matrix()
new_ma = hic_ma.matrix
elif args.method == 'pearson':
sys.stderr.write("\nComputing observed / expected\n")
hic_ma.convert_to_obs_exp_matrix()
sys.stderr.write("\nComputing pearson\n")
new_ma = getPearson(hic_ma.matrix)
elif args.method != 'none':
# check that the normalized and original matrices
# have the same size
if orig_ma:
assert np.all(hic_ma.matrix.shape == orig_ma.matrix.shape), \
"original and derived matrices do not have same shape"
new_ma = transformMatrix(hic_ma, args.method,
per_chr=args.perChromosome,
original_matrix=orig_ma,
depth_in_bins=max_depth_in_bins)
hic_ma.convert_to_obs_exp_matrix(maxdepth=args.depth, perchr=args.perchr)
else:
new_ma = hic_ma.matrix

if args.applyFdr:
new_ma = applyFdr(new_ma)
hic_ma.convert_to_zscore_matrix(maxdepth=args.depth, perchr=args.perchr)

hic_ma.setMatrixValues(new_ma)
hic_ma.restoreMaskedBins()
if args.outFormat == 'dekker':
hic_ma.save_dekker(args.outFileName)
else:
hic_ma.save(args.outFileName)
hic_ma.save(args.outFileName)

0 comments on commit 7d47403

Please sign in to comment.