Skip to content

Commit

Permalink
Adding obs_exp with included non-zero values.
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimwolff committed Nov 19, 2018
1 parent 146b687 commit 764312e
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 7 deletions.
22 changes: 20 additions & 2 deletions hicexplorer/hicTransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from hicmatrix import HiCMatrix as hm
from hicexplorer._version import __version__
from hicexplorer.utilities import obs_exp_matrix_lieberman, obs_exp_matrix_norm, obs_exp_matrix
from hicexplorer.utilities import obs_exp_matrix_lieberman, obs_exp_matrix_norm, obs_exp_matrix_non_zero, obs_exp_matrix
from hicexplorer.utilities import convertNansToZeros, convertInfsToZeros


Expand Down Expand Up @@ -39,7 +39,7 @@ def parse_arguments(args=None):

parserOpt.add_argument('--method', '-me',
help='Transformation method to use for input matrix. Transformation is computed per chromosome.',
choices=['obs_exp', 'obs_exp_lieberman', 'pearson', 'covariance', 'norm'],
choices=['obs_exp', 'obs_exp_lieberman', 'obs_exp_non_zero','pearson', 'covariance', 'norm'],
default='obs_exp')

parserOpt.add_argument('--chromosomes',
Expand Down Expand Up @@ -93,6 +93,13 @@ def _obs_exp(pSubmatrix):
obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)).todense()
return obs_exp_matrix_

def _obs_exp_non_zero(pSubmatrix):

obs_exp_matrix_ = obs_exp_matrix_non_zero(pSubmatrix)
obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_))
obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)).todense()
return obs_exp_matrix_

def main(args=None):

args = parse_arguments().parse_args(args)
Expand Down Expand Up @@ -135,6 +142,17 @@ def main(args=None):
submatrix = _obs_exp(hic_ma.matrix)
trasf_matrix = csr_matrix(submatrix)

elif args.method == 'obs_exp_non_zero':
if args.perChromosome:

for chrname in hic_ma.getChrNames():
chr_range = hic_ma.getChrBinRange(chrname)
submatrix = hic_ma.matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]]
submatrix.astype(float)
trasf_matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] = lil_matrix(_obs_exp_non_zero(submatrix))
else:
submatrix = _obs_exp(hic_ma.matrix)
trasf_matrix = csr_matrix(submatrix)
elif args.method == 'obs_exp_lieberman':
length_chromosome = 0
chromosome_count = len(hic_ma.getChrNames())
Expand Down
4 changes: 2 additions & 2 deletions hicexplorer/test/general/test_hicTransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_hic_transfer_obs_exp():
outfile = NamedTemporaryFile(suffix='obs_exp_.h5', delete=False)
outfile.close()

args = "--matrix {} --outFileName {} --method obs_exp".format(original_matrix, outfile.name).split()
args = "--matrix {} --outFileName {} --method obs_exp_non_zero".format(original_matrix, outfile.name).split()
hicTransform.main(args)

test = hm.hiCMatrix(ROOT + "hicTransform/obs_exp.h5")
Expand All @@ -30,7 +30,7 @@ def test_hic_transfer_obs_exp_perChromosome():
outfile = NamedTemporaryFile(suffix='obs_exp_.h5', delete=False)
outfile.close()

args = "--matrix {} --outFileName {} --method obs_exp --perChromosome".format(original_matrix, outfile.name).split()
args = "--matrix {} --outFileName {} --method obs_exp_non_zero --perChromosome".format(original_matrix, outfile.name).split()
hicTransform.main(args)

test = hm.hiCMatrix(ROOT + "hicTransform/obs_exp_perChromosome.h5")
Expand Down
56 changes: 53 additions & 3 deletions hicexplorer/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def expected_interactions_in_distance(pLength_chromosome, pChromosome_count, pSu



def expected_interactions(pSubmatrix):
def expected_interactions_non_zero(pSubmatrix):
"""
Computes the expected number of interactions per distance
"""
Expand All @@ -252,8 +252,35 @@ def expected_interactions(pSubmatrix):
expected_interactions[distance_] += pSubmatrix.data[i]
occurences[distance_] += 1
expected_interactions /= occurences

mask = np.isnan(expected_interactions)
expected_interactions[mask] = 0
mask = np.isinf(expected_interactions)
expected_interactions[mask] = 0

return expected_interactions

def expected_interactions(pSubmatrix):
"""
Computes the expected number of interactions per distance
"""

expected_interactions = np.zeros(pSubmatrix.shape[0])
row, col = pSubmatrix.nonzero()
distance = np.absolute(row - col)
occurrences = np.arange(pSubmatrix.shape[0]+1, 1, -1)
# occurences = np.zeros(pSubmatrix.shape[0])
for i, distance_ in enumerate(distance):
expected_interactions[distance_] += pSubmatrix.data[i]
# occurences[distance_] += 1
expected_interactions /= occurrences

mask = np.isnan(expected_interactions)
expected_interactions[mask] = 0
mask = np.isinf(expected_interactions)
expected_interactions[mask] = 0

return expected_interactions

def obs_exp_matrix_lieberman(pSubmatrix, pLength_chromosome, pChromosome_count):
"""
Expand Down Expand Up @@ -289,7 +316,7 @@ def obs_exp_matrix_norm(pSubmatrix):
m_i,j = interaction_i,j / exp_i,j
"""

expected_interactions_in_distance = expected_interactions(pSubmatrix)
expected_interactions_in_distance = expected_interactions_non_zero(pSubmatrix)

row_sums = np.array(pSubmatrix.sum(axis=1).T).flatten()
total_interactions = pSubmatrix.sum()
Expand All @@ -303,6 +330,30 @@ def obs_exp_matrix_norm(pSubmatrix):
return pSubmatrix


def obs_exp_matrix_non_zero(pSubmatrix):
"""
Creates normalized contact matrix M* by
dividing each entry by the gnome-wide
expected contacts for loci at
that genomic distance.
exp_i,j = sum(interactions at distance abs(i-j)) / number of non-zero interactions at abs(i-j)
"""

expected_interactions_in_distance_ = expected_interactions_non_zero(pSubmatrix)
row, col = pSubmatrix.nonzero()
distance = np.ceil(np.absolute(row - col) / 2).astype(np.int32)

if len(pSubmatrix.data) > 0:
data_type = type(pSubmatrix.data[0])

expected = expected_interactions_in_distance_[distance]
pSubmatrix.data = pSubmatrix.data.astype(np.float32)
pSubmatrix.data /= expected
pSubmatrix.data = convertInfsToZeros_ArrayFloat(pSubmatrix.data).astype(data_type)
return pSubmatrix

def obs_exp_matrix(pSubmatrix):
"""
Creates normalized contact matrix M* by
Expand All @@ -326,7 +377,6 @@ def obs_exp_matrix(pSubmatrix):
pSubmatrix.data /= expected
pSubmatrix.data = convertInfsToZeros_ArrayFloat(pSubmatrix.data).astype(data_type)
return pSubmatrix

def toString(s):
"""
This takes care of python2/3 differences
Expand Down

0 comments on commit 764312e

Please sign in to comment.