Skip to content

Commit

Permalink
Merge pull request #222 from deeptools/hotfix_to_string
Browse files Browse the repository at this point in the history
fix toString for numpy arrays
  • Loading branch information
fidelram committed Mar 27, 2018
2 parents d15a9f5 + 708ae8e commit 6dee1b2
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 14 deletions.
9 changes: 4 additions & 5 deletions hicexplorer/HiCMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def load_cool(self, pMatrixFile, pChrnameList=None, pMatrixOnly=None, pIntraChro
cut_intervals = []

for values in cut_intervals_data_frame.values:
cut_intervals.append(tuple([toBytes(values[0]), values[1], values[2], 1.0]))
cut_intervals.append(tuple([toString(values[0]), values[1], values[2], 1.0]))

# try to restore nan_bins.
try:
Expand Down Expand Up @@ -284,8 +284,7 @@ def load_npz(matrixFile):
else:
distance_counts = _ma['dist_counts'].tolist()

map(toString, _ma['chrNameList'])
cut_intervals = zip(_ma['chrNameList'], _ma['startList'],
cut_intervals = zip(toString(_ma['chrNameList']), _ma['startList'],
_ma['endList'], _ma['extraList'])

assert len(cut_intervals) == matrix.shape[0], \
Expand Down Expand Up @@ -880,13 +879,13 @@ def convert_to_obs_exp_matrix(self, maxdepth=None, zscore=False, perchr=False):
# if zscore is needed, compute standard deviation: std = sqrt(mean(abs(x - x.mean())**2))
if zscore:
values_sqrt_diff = \
np.abs((submatrix.data[dist_list == bin_dist_plus_one] - mu[bin_dist_plus_one])**2)
np.abs((submatrix.data[dist_list == bin_dist_plus_one] - mu[bin_dist_plus_one]) ** 2)
# the standard deviation is the sum of the differences with mu squared (value variable)
# plus all zeros that are not included in the sparse matrix
# for which the standard deviation is
# (0 - mu)**2 = (mu)**2
# The number of zeros is the diagonal length - the length of the non zero values
zero_values_sqrt_diff_sum = (diagonal_length - len(values_sqrt_diff)) * mu[bin_dist_plus_one]**2
zero_values_sqrt_diff_sum = (diagonal_length - len(values_sqrt_diff)) * mu[bin_dist_plus_one] ** 2

_std = np.sqrt((values_sqrt_diff.sum() + zero_values_sqrt_diff_sum) / diagonal_length)
std[bin_dist_plus_one] = _std
Expand Down
8 changes: 4 additions & 4 deletions hicexplorer/hicAggregateContacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import matplotlib.cm as cm
import hicexplorer.HiCMatrix as hm
import hicexplorer.utilities
from .utilities import toBytes
from .utilities import toString
from .utilities import check_chrom_str_bytes

import logging
Expand Down Expand Up @@ -557,7 +557,7 @@ def main(args=None):
seen[chrom] = set()
over_1_5 = 0
empty_mat = 0
chrom_bin_range = ma.getChrBinRange(toBytes(chrom))
chrom_bin_range = ma.getChrBinRange(toString(chrom))

log.info("processing {}".format(chrom))

Expand All @@ -566,7 +566,7 @@ def main(args=None):
# check all other regions that may interact with the
# current interval at the given depth range

bin_id = ma.getRegionBinRange(toBytes(chrom), start, end)
bin_id = ma.getRegionBinRange(toString(chrom), start, end)
if bin_id is None:
continue
else:
Expand All @@ -577,7 +577,7 @@ def main(args=None):
if counter % 50000 == 0:
log.info("Number of contacts considered: {:,}".format(counter))

bin_id2 = ma.getRegionBinRange(toBytes(chrom), start2, end2)
bin_id2 = ma.getRegionBinRange(toString(chrom), start2, end2)
if bin_id2 is None:
continue
else:
Expand Down
2 changes: 1 addition & 1 deletion hicexplorer/hicPlotMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def main(args=None):
log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries))
if sys.version_info[0] == 3:
args.chromosomeOrder = toBytes(args.chromosomeOrder)
for chrom in args.chromosomeOrder:
for chrom in toString(args.chromosomeOrder):
if chrom in ma.chrBinBoundaries:
valid_chromosomes.append(chrom)
else:
Expand Down
8 changes: 4 additions & 4 deletions hicexplorer/test/test_hicmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

def test_save_load():
outfile = '/tmp/matrix.h5'
cut_intervals = [(b'a', 0, 10, 1), (b'a', 10, 20, 1),
(b'a', 20, 30, 1), (b'a', 30, 40, 1), (b'b', 40, 50, 1)]
cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1),
('a', 20, 30, 1), ('a', 30, 40, 1), ('b', 40, 50, 1)]
hic = hm.hiCMatrix()
hic.nan_bins = []
matrix = np.array([[1, 8, 5, 3, 0],
Expand Down Expand Up @@ -139,8 +139,8 @@ def test_convert_to_zscore_matrix_2():

def test_save_load_cooler_format():
outfile = '/tmp/matrix2.cool'
cut_intervals = [(b'a', 0, 10, 1), (b'a', 10, 20, 1),
(b'a', 20, 30, 1), (b'a', 30, 40, 1), (b'b', 40, 50, 1)]
cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1),
('a', 20, 30, 1), ('a', 30, 40, 1), ('b', 40, 50, 1)]
hic = hm.hiCMatrix()
hic.nan_bins = []
matrix = np.array([[1, 8, 5, 3, 0],
Expand Down
2 changes: 2 additions & 0 deletions hicexplorer/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ def toString(s):
return s.decode('ascii')
if isinstance(s, list):
return [toString(x) for x in s]
if isinstance(s, np.ndarray):
return s.astype(str)
return s


Expand Down

0 comments on commit 6dee1b2

Please sign in to comment.