Skip to content

Commit

Permalink
Merge pull request #517 from deeptools/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
joachimwolff committed Mar 23, 2020
2 parents 66f3a66 + ca2b26f commit 2d87672
Show file tree
Hide file tree
Showing 108 changed files with 10,896 additions and 615 deletions.
12 changes: 12 additions & 0 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
News and Developments
=====================

Release 3.4.3
-------------
**23 March 2020**

- Fixing the wrong p-value computation in for chicViewpoint. New method is more accurate for floating points.
- Fixing a bug in chicViewpointBackgroundModel and chicQualityControl if an non-existing reference point was used.
- Improving all chic* modules with a capturing of errors in sub-processes. It is now guaranteed that the computation will terminate. Either successfull or by error.
- Add option 'truncateZero' to chicViewpointBackgroundModel: This removes all zero values for the distributions before fitting to fight over dispersion.
- Add option 'decimalPlaces' to chicViewpoint to adjust the decimal places in the output for all floating values. Helpful for really small p-values
- Add option 'truncateZeroPvalues' to chicSignificantInteractions to set all p-values which are 0 to 1 and are therefore ignored.
- Add option 'truncateZeroPvalues' to chicPlotViewpoint to set all p-values which are 0 to 1 and do not disturb the presentation of real p-values

Release 3.4.2
-------------
**7 March 2020**
Expand Down
2 changes: 1 addition & 1 deletion hicexplorer/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# This file is originally generated from Git information by running 'setup.py
# version'. Distribution tarballs contain a pre-generated copy of this file.

__version__ = '3.4.2'
__version__ = '3.4.3'
89 changes: 50 additions & 39 deletions hicexplorer/chicAggregateStatistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,49 +169,53 @@ def write(pOutFileName, pHeader, pNeighborhoods, pInteractionLines):
def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pViewpointObj, pQueue=None):
outfile_names = []
target_regions_intervaltree = None
if pArgs.batchMode and len(pTargetList) == 1:
target_regions = utilities.readBed(pTargetList[0])
hicmatrix = hm.hiCMatrix()
target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(target_regions)[0]

for i, interactionFile in enumerate(pInteractionFilesList):
for sample in interactionFile:
if pArgs.interactionFileFolder != '.':
absolute_sample_path = pArgs.interactionFileFolder + '/' + sample
else:
absolute_sample_path = sample
header, interaction_data, interaction_file_data = pViewpointObj.readInteractionFileForAggregateStatistics(
absolute_sample_path)
log.debug('len(pTargetList) {}'.format(len(pTargetList)))
if pArgs.batchMode and len(pTargetList) > 1:
if pArgs.targetFileFolder != '.':
target_file = pArgs.targetFileFolder + '/' + pTargetList[i]
try:
if pArgs.batchMode and len(pTargetList) == 1:
target_regions = utilities.readBed(pTargetList[0])
hicmatrix = hm.hiCMatrix()
target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(target_regions)[0]

for i, interactionFile in enumerate(pInteractionFilesList):
for sample in interactionFile:
if pArgs.interactionFileFolder != '.':
absolute_sample_path = pArgs.interactionFileFolder + '/' + sample
else:
absolute_sample_path = sample
header, interaction_data, interaction_file_data = pViewpointObj.readInteractionFileForAggregateStatistics(
absolute_sample_path)
log.debug('len(pTargetList) {}'.format(len(pTargetList)))
if pArgs.batchMode and len(pTargetList) > 1:
if pArgs.targetFileFolder != '.':
target_file = pArgs.targetFileFolder + '/' + pTargetList[i]
else:
target_file = pTargetList[i]
elif pArgs.batchMode and len(pTargetList) == 1:
target_file = None
else:
target_file = pTargetList[i]
elif pArgs.batchMode and len(pTargetList) == 1:
target_file = None
else:
target_file = pTargetList[i]

accepted_scores = filter_scores_target_list(interaction_file_data, pTargetList=target_file, pTargetIntervalTree=target_regions_intervaltree)
accepted_scores = filter_scores_target_list(interaction_file_data, pTargetList=target_file, pTargetIntervalTree=target_regions_intervaltree)

if len(accepted_scores) == 0:
# do not call 'break' or 'continue'
# with this an empty file is written and no track of 'no significant interactions' detected files needs to be recorded.
if pArgs.batchMode:
with open('errorLog.txt', 'a+') as errorlog:
errorlog.write('Failed for: {} and {}.\n'.format(interactionFile[0], interactionFile[1]))
else:
log.info('No target regions found')
outFileName = '.'.join(sample.split('/')[-1].split('.')[:-1]) + '_' + pArgs.outFileNameSuffix

if pArgs.batchMode:
outfile_names.append(outFileName)
if pArgs.outputFolder != '.':
outFileName = pArgs.outputFolder + '/' + outFileName
if len(accepted_scores) == 0:
# do not call 'break' or 'continue'
# with this an empty file is written and no track of 'no significant interactions' detected files needs to be recorded.
if pArgs.batchMode:
with open('errorLog.txt', 'a+') as errorlog:
errorlog.write('Failed for: {} and {}.\n'.format(interactionFile[0], interactionFile[1]))
else:
log.info('No target regions found')
outFileName = '.'.join(sample.split('/')[-1].split('.')[:-1]) + '_' + pArgs.outFileNameSuffix

write(outFileName, header, accepted_scores,
interaction_file_data)
if pArgs.batchMode:
outfile_names.append(outFileName)
if pArgs.outputFolder != '.':
outFileName = pArgs.outputFolder + '/' + outFileName

write(outFileName, header, accepted_scores,
interaction_file_data)
except Exception as exp:
pQueue.put('Fail: ' + str(exp))
return
if pQueue is None:
return
pQueue.put(outfile_names)
Expand All @@ -225,6 +229,8 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
queue = [None] * pArgs.threads
process = [None] * pArgs.threads
thread_done = [False] * pArgs.threads
fail_flag = False
fail_message = ''
for i in range(pArgs.threads):

if i < pArgs.threads - 1:
Expand All @@ -250,6 +256,9 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
for i in range(pArgs.threads):
if queue[i] is not None and not queue[i].empty():
background_data_thread = queue[i].get()
if 'Fail:' in background_data_thread:
fail_flag = True
fail_message = background_data_thread[6:]
outfile_names[i] = background_data_thread
queue[i] = None
process[i].join()
Expand All @@ -261,7 +270,9 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
if not thread:
all_data_collected = False
time.sleep(1)

if fail_flag:
log.error(fail_message)
exit(1)
outfile_names = [item for sublist in outfile_names for item in sublist]
return outfile_names

Expand Down
136 changes: 75 additions & 61 deletions hicexplorer/chicDifferentialTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,77 +208,83 @@ def writeResult(pOutFileName, pData, pHeaderOld, pHeaderNew, pAlpha, pTest):

def run_statistical_tests(pInteractionFilesList, pArgs, pQueue=None):
rejected_names = []
for interactionFile in pInteractionFilesList:
try:
for interactionFile in pInteractionFilesList:

sample_prefix = interactionFile[0].split(
'/')[-1].split('_')[0] + '_' + interactionFile[1].split('/')[-1].split('_')[0]
sample_prefix = interactionFile[0].split(
'/')[-1].split('_')[0] + '_' + interactionFile[1].split('/')[-1].split('_')[0]

region_prefix = '_'.join(
interactionFile[0].split('/')[-1].split('_')[1:6])
region_prefix = '_'.join(
interactionFile[0].split('/')[-1].split('_')[1:6])

outFileName = sample_prefix + '_' + region_prefix
rejected_name_output_file = outFileName + '_H0_rejected.txt'
outFileName = sample_prefix + '_' + region_prefix
rejected_name_output_file = outFileName + '_H0_rejected.txt'

if pArgs.outputFolder != '.':
outFileName_accepted = pArgs.outputFolder + \
'/' + outFileName + '_H0_accepted.txt'
outFileName_rejected = pArgs.outputFolder + \
'/' + outFileName + '_H0_rejected.txt'
outFileName = pArgs.outputFolder + '/' + outFileName + '_results.txt'
else:
outFileName_accepted = outFileName + '_H0_accepted.txt'
outFileName_rejected = outFileName + '_H0_rejected.txt'
outFileName = outFileName + '_results.txt'

if pArgs.interactionFileFolder != '.':
absolute_sample_path1 = pArgs.interactionFileFolder + '/' + interactionFile[0]
absolute_sample_path2 = pArgs.interactionFileFolder + '/' + interactionFile[1]

else:
absolute_sample_path1 = interactionFile[0]
absolute_sample_path2 = interactionFile[1]
if pArgs.outputFolder != '.':
outFileName_accepted = pArgs.outputFolder + \
'/' + outFileName + '_H0_accepted.txt'
outFileName_rejected = pArgs.outputFolder + \
'/' + outFileName + '_H0_rejected.txt'
outFileName = pArgs.outputFolder + '/' + outFileName + '_results.txt'
else:
outFileName_accepted = outFileName + '_H0_accepted.txt'
outFileName_rejected = outFileName + '_H0_rejected.txt'
outFileName = outFileName + '_results.txt'

header1, line_content1, data1 = readInteractionFile(absolute_sample_path1)
header2, line_content2, data2 = readInteractionFile(absolute_sample_path2)
if pArgs.interactionFileFolder != '.':
absolute_sample_path1 = pArgs.interactionFileFolder + '/' + interactionFile[0]
absolute_sample_path2 = pArgs.interactionFileFolder + '/' + interactionFile[1]

if len(line_content1) == 0 or len(line_content2) == 0:
writeResult(outFileName, None, header1, header2,
else:
absolute_sample_path1 = interactionFile[0]
absolute_sample_path2 = interactionFile[1]

header1, line_content1, data1 = readInteractionFile(absolute_sample_path1)
header2, line_content2, data2 = readInteractionFile(absolute_sample_path2)

if len(line_content1) == 0 or len(line_content2) == 0:
writeResult(outFileName, None, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_accepted, None, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_rejected, None, header1, header2,
pArgs.alpha, pArgs.statisticTest)
rejected_names.append(rejected_name_output_file)
continue
if pArgs.statisticTest == 'chi2':
test_result, accepted, rejected = chisquare_test(
data1, data2, pArgs.alpha)
elif pArgs.statisticTest == 'fisher':
test_result, accepted, rejected = fisher_exact_test(
data1, data2, pArgs.alpha)

write_out_lines = []
for i, result in enumerate(test_result):
write_out_lines.append(
[line_content1[i], line_content2[i], result, data1[i], data2[i]])

write_out_lines_accepted = []
for result in accepted:
write_out_lines_accepted.append(
[line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])

write_out_lines_rejected = []
for result in rejected:
write_out_lines_rejected.append(
[line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])

writeResult(outFileName, write_out_lines, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_accepted, None, header1, header2,
writeResult(outFileName_accepted, write_out_lines_accepted, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_rejected, None, header1, header2,
writeResult(outFileName_rejected, write_out_lines_rejected, header1, header2,
pArgs.alpha, pArgs.statisticTest)
rejected_names.append(rejected_name_output_file)
continue
if pArgs.statisticTest == 'chi2':
test_result, accepted, rejected = chisquare_test(
data1, data2, pArgs.alpha)
elif pArgs.statisticTest == 'fisher':
test_result, accepted, rejected = fisher_exact_test(
data1, data2, pArgs.alpha)

write_out_lines = []
for i, result in enumerate(test_result):
write_out_lines.append(
[line_content1[i], line_content2[i], result, data1[i], data2[i]])

write_out_lines_accepted = []
for result in accepted:
write_out_lines_accepted.append(
[line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])

write_out_lines_rejected = []
for result in rejected:
write_out_lines_rejected.append(
[line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])

writeResult(outFileName, write_out_lines, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_accepted, write_out_lines_accepted, header1, header2,
pArgs.alpha, pArgs.statisticTest)
writeResult(outFileName_rejected, write_out_lines_rejected, header1, header2,
pArgs.alpha, pArgs.statisticTest)
rejected_names.append(rejected_name_output_file)

except Exception as exp:
pQueue.put('Fail: ' + str(exp))
return

if pQueue is None:
return
pQueue.put(rejected_names)
Expand Down Expand Up @@ -313,6 +319,8 @@ def main(args=None):
(args.interactionFile[i], args.interactionFile[i + 1]))
i += 2

fail_flag = False
fail_message = ''
if args.batchMode:
rejected_file_names = [None] * args.threads
interactionFilesPerThread = len(interactionFileList) // args.threads
Expand Down Expand Up @@ -342,6 +350,9 @@ def main(args=None):
for i in range(args.threads):
if queue[i] is not None and not queue[i].empty():
background_data_thread = queue[i].get()
if 'Fail:' in background_data_thread:
fail_flag = True
fail_message = background_data_thread[6:]
rejected_file_names[i] = background_data_thread
queue[i] = None
process[i].join()
Expand All @@ -353,6 +364,9 @@ def main(args=None):
if not thread:
all_data_collected = False
time.sleep(1)
if fail_flag:
log.error(fail_message)
exit(1)
else:
run_statistical_tests(interactionFileList, args)

Expand Down

0 comments on commit 2d87672

Please sign in to comment.