In [1]:
from forbidden_region_processing import *

In [7]:
forbidden_region_file = 'acrocentric_telo_cen.bed'
output_file = 'cn_bins_50kbp.txt'

In [3]:
def partition_region(start, end, target_size=50000, allowance=100):
	min_residual = target_size + allowance + 1
	min_residual_k = -1
	for epsilon in range(-1 * allowance, allowance + 1):
		k = target_size + epsilon
		length = end - start + 1
		residual = target_size - (length % k)
		if residual < min_residual:
			min_residual_k = k
			min_residual = residual
		# print(k, residual)
	return_list = []
	while start <= end:
		return_list.append((start, min(start + min_residual_k, end)))
		start += min_residual_k
	return return_list

In [4]:
forbidden_segments = read_forbidden_regions(forbidden_region_file).segments

In [5]:
chrom_region_dict = {}
for chrom in ['Chr' + str(i) for i in range(1, 23)] + ['ChrX', 'ChrY']:
	nonforbidden_regions = []
	chrom_segs = []
	for seg in forbidden_segments:
		if seg.chr_name == chrom:
			chrom_segs.append(seg)
	# print(chrom)
	# for seg in chrom_segs:
	# 	print(seg)
	is_acrocentric = False
	for seg in chrom_segs:
		if seg.segment_type.startswith('acrocentric'):
			is_acrocentric = True
	# print(is_acrocentric)
	if not is_acrocentric:
		telo1 = chrom_segs[0]
		cen = chrom_segs[1]
		telo2 = chrom_segs[2]
		nonforbidden_regions.append((telo1.end + 1, cen.start - 1))
		nonforbidden_regions.append((cen.end + 1, telo2.start - 1))
	else:
		cen = chrom_segs[1]
		telo2 = chrom_segs[2]
		nonforbidden_regions.append((cen.end + 1, telo2.start - 1))
	chrom_region_dict[chrom] = nonforbidden_regions

In [6]:
cn_bin = []
for chrom, nonforbidden_regions in chrom_region_dict.items():
	for (c_start, c_end) in nonforbidden_regions:
		c_cn_bin = partition_region(c_start, c_end)
		for (bin_start, bin_end) in c_cn_bin:
			cn_bin.append((chrom, bin_start, bin_end, bin_end - bin_start + 1))

In [8]:
with open(output_file, 'w') as fp_write:
	for bin_itr in cn_bin:
		fp_write.write('{}\t{}\t{}\t{}\n'.format(*bin_itr))