Skip to content

Commit

Permalink
Test cases of hicFindTADs pass with python 3 now
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimwolff committed Nov 6, 2017
1 parent 14bad6d commit fe4bc21
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 10 deletions.
38 changes: 38 additions & 0 deletions hicexplorer/HiCMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,15 @@ def getRegionBinRange(self, chrname, startpos, endpos):
"""

try:
# chromosome_size = hic_matrix.get_chromosome_sizes()
if type(next(iter(self.interval_trees))) != type(chrname):
if type(next(iter(self.interval_trees))) is str:
chrname = toString(chrname)
elif type(next(iter(self.interval_trees))) is bytes:
chrname = toBytes(chrname)
elif type(next(iter(self.interval_trees))) is np.bytes_:
chrname = toBytes(chrname)
# chr_end_pos = chromosome_size[chrname]
self.interval_trees[chrname]
except KeyError:
"""
Expand Down Expand Up @@ -1676,3 +1685,32 @@ def intervalListToIntervalTree(interval_list):
chrbin_boundaries[chrom] = (chr_start_id, intval_id)

return cut_int_tree, chrbin_boundaries

def toString(s):
"""
This takes care of python2/3 differences
"""
if isinstance(s, str):
return s
if isinstance(s, bytes):
if sys.version_info[0] == 2:
return str(s)
return s.decode('ascii')
if isinstance(s, list):
return [toString(x) for x in s]
return s


def toBytes(s):
"""
Like toString, but for functions requiring bytes in python3
"""
if sys.version_info[0] == 2:
return s
if isinstance(s, bytes):
return s
if isinstance(s, str):
return bytes(s, 'ascii')
if isinstance(s, list):
return [toBytes(x) for x in s]
return s
35 changes: 25 additions & 10 deletions hicexplorer/hicFindTADs.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,18 @@ def get_idx_of_bins_at_given_distance(hic_matrix, idx, window_len):
# the range [start:i] should have running window
# length elements (i is excluded from the range)
chrom, cut_start, cut_end, _ = hic_matrix.getBinPos(idx)

left_start = max(0, cut_start - window_len)
left_idx = hic_matrix.getRegionBinRange(chrom, left_start, left_start + 1)[0]
chr_end_pos = hic_matrix.get_chromosome_sizes()[chrom]
# chr_end_pos = hic_matrix.get_chromosome_sizes()[chrom]
# if ?ring(chrom)
chromosome_size = hic_matrix.get_chromosome_sizes()
if type(next(iter(chromosome_size))) != type(chrom):
if type(next(iter(chromosome_size))) is str:
chrom = toString(chrom)
elif type(next(iter(chromosome_size))) is bytes:
chrom = toBytes(chrom)
chr_end_pos = chromosome_size[chrom]

right_end = min(chr_end_pos, cut_end + window_len) - 1
right_idx = hic_matrix.getRegionBinRange(chrom, right_end, right_end)[0]
Expand All @@ -237,7 +246,7 @@ def get_cut_weight(hic_matrix, cut, window_len, return_mean=False):
try:
left_idx, right_idx = get_idx_of_bins_at_given_distance(hic_matrix, cut, window_len)
except TypeError:
log.warn("Problem with cut: {}, window length: {}".format(cut, window_len))
# log.warn("Problem with cut: {}, window length: {}".format(cut, window_len))
return None

if return_mean is True:
Expand Down Expand Up @@ -324,10 +333,13 @@ def compute_matrix(bins_list, min_win_size=8, max_win_size=50, step_len=2):
positions_array = []
cond_matrix = []
incremental_step = get_incremental_step_size(min_win_size, max_win_size, step_len)
# if type(next(iter(self.interval_trees))) is np.bytes_:
# chrname = toBytes(chrname)
# else:
# chrname = toString(chrname)
# print("cut_intervals", hic_ma.cut_intervals)
for cut in bins_list:

chrom, chr_start, chr_end, _ = hic_ma.cut_intervals[cut]

# get conductance
# for multiple window lengths at a time
mult_matrix = [get_cut_weight(hic_ma, cut, depth, return_mean=True) for depth in incremental_step]
Expand All @@ -338,7 +350,6 @@ def compute_matrix(bins_list, min_win_size=8, max_win_size=50, step_len=2):
cond_matrix.append(mult_matrix)

positions_array.append((chrom, chr_start, chr_end))

chrom, chr_start, chr_end = zip(*positions_array)
cond_matrix = np.vstack(cond_matrix)

Expand Down Expand Up @@ -1105,7 +1116,7 @@ def compute_spectra_matrix(self):
chr_end = []
matrix = []
for _chrom, _chr_start, _chr_end, _matrix in res:
chrom.extend(_chrom)
chrom.extend(toString(_chrom))
chr_start.extend(_chr_start)
chr_end.extend(_chr_end)
matrix.append(_matrix)
Expand Down Expand Up @@ -1183,13 +1194,13 @@ def min_pvalue(self, min_idx):

new_min_idx += [idx]
min_chr, min_start, min_end, _ = self.hic_ma.getBinPos(matrix_idx)
assert chrom[idx] == min_chr and chr_start[idx] == min_start and chr_end[idx] == min_end

assert toString(chrom[idx]) == toString(min_chr) and chr_start[idx] == min_start and chr_end[idx] == min_end
left_idx, right_idx = get_idx_of_bins_at_given_distance(self.hic_ma, matrix_idx, window_len)

left = get_cut_weight(self.hic_ma, left_idx, window_len)
right = get_cut_weight(self.hic_ma, right_idx, window_len)
boundary = get_cut_weight(self.hic_ma, matrix_idx, window_len)

if left is None:
left = []
if right is None:
Expand All @@ -1200,7 +1211,9 @@ def min_pvalue(self, min_idx):

elif boundary is None or len(boundary) == 0 or len(left) == 0 or len(right) == 0:
pval = np.nan

else:

try:
pval1 = ranksums(boundary, left)[1]
pval2 = ranksums(boundary, right)[1]
Expand All @@ -1212,9 +1225,9 @@ def min_pvalue(self, min_idx):

assert len(pvalues) == len(new_min_idx)

print('len(pvalues)', len(pvalues))
# fdr
if self.correct_for_multiple_testing == 'fdr':

pvalues = np.array([e if ~np.isnan(e) else 1 for e in pvalues])
pvalues_ = sorted(pvalues)
largest_p_i = 0
Expand All @@ -1226,7 +1239,9 @@ def min_pvalue(self, min_idx):
elif self.correct_for_multiple_testing == 'bonferroni':
# bonferroni correction
pvalues = np.array(pvalues) * len(pvalues)
pvalues[np.array([e > 1 if ~np.isnan(e) else False for e in pvalues])] = 1
to_one_index_values = np.array([e > 1 if ~np.isnan(e) else False for e in pvalues])
if len(to_one_index_values) > 0:
pvalues[to_one_index_values] = 1


return OrderedDict(zip(new_min_idx, pvalues))
Expand Down

0 comments on commit fe4bc21

Please sign in to comment.