Merge pull request #517 from deeptools/develop

Develop
deeptools · Mar 23, 2020 · 2d87672 · 2d87672
2 parents 66f3a66 + ca2b26f
commit 2d87672
Show file tree

Hide file tree

Showing 108 changed files with 10,896 additions and 615 deletions.
diff --git a/docs/content/News.rst b/docs/content/News.rst
@@ -1,6 +1,18 @@
 News and Developments
 =====================
 
+Release 3.4.3
+-------------
+**23 March 2020**
+
+- Fixing the wrong p-value computation in for chicViewpoint. New method is more accurate for floating points.
+- Fixing a bug in chicViewpointBackgroundModel and chicQualityControl if an non-existing reference point was used.
+- Improving all chic* modules with a capturing of errors in sub-processes. It is now guaranteed that the computation will terminate. Either successfull or by error. 
+- Add option 'truncateZero' to chicViewpointBackgroundModel: This removes all zero values for the distributions before fitting to fight over dispersion.
+- Add option 'decimalPlaces' to chicViewpoint to adjust the decimal places in the output for all floating values. Helpful for really small p-values
+- Add option 'truncateZeroPvalues' to chicSignificantInteractions to set all p-values which are 0 to 1 and are therefore ignored.
+- Add option 'truncateZeroPvalues' to chicPlotViewpoint to set all p-values which are 0 to 1 and do not disturb the presentation of real p-values
+
 Release 3.4.2
 -------------
 **7 March 2020**

diff --git a/hicexplorer/_version.py b/hicexplorer/_version.py
@@ -2,4 +2,4 @@
 # This file is originally generated from Git information by running 'setup.py
 # version'. Distribution tarballs contain a pre-generated copy of this file.
 
-__version__ = '3.4.2'
+__version__ = '3.4.3'
diff --git a/hicexplorer/chicAggregateStatistic.py b/hicexplorer/chicAggregateStatistic.py
@@ -169,49 +169,53 @@ def write(pOutFileName, pHeader, pNeighborhoods, pInteractionLines):
 def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pViewpointObj, pQueue=None):
     outfile_names = []
     target_regions_intervaltree = None
-    if pArgs.batchMode and len(pTargetList) == 1:
-        target_regions = utilities.readBed(pTargetList[0])
-        hicmatrix = hm.hiCMatrix()
-        target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(target_regions)[0]
-
-    for i, interactionFile in enumerate(pInteractionFilesList):
-        for sample in interactionFile:
-            if pArgs.interactionFileFolder != '.':
-                absolute_sample_path = pArgs.interactionFileFolder + '/' + sample
-            else:
-                absolute_sample_path = sample
-            header, interaction_data, interaction_file_data = pViewpointObj.readInteractionFileForAggregateStatistics(
-                absolute_sample_path)
-            log.debug('len(pTargetList) {}'.format(len(pTargetList)))
-            if pArgs.batchMode and len(pTargetList) > 1:
-                if pArgs.targetFileFolder != '.':
-                    target_file = pArgs.targetFileFolder + '/' + pTargetList[i]
+    try:
+        if pArgs.batchMode and len(pTargetList) == 1:
+            target_regions = utilities.readBed(pTargetList[0])
+            hicmatrix = hm.hiCMatrix()
+            target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(target_regions)[0]
+
+        for i, interactionFile in enumerate(pInteractionFilesList):
+            for sample in interactionFile:
+                if pArgs.interactionFileFolder != '.':
+                    absolute_sample_path = pArgs.interactionFileFolder + '/' + sample
+                else:
+                    absolute_sample_path = sample
+                header, interaction_data, interaction_file_data = pViewpointObj.readInteractionFileForAggregateStatistics(
+                    absolute_sample_path)
+                log.debug('len(pTargetList) {}'.format(len(pTargetList)))
+                if pArgs.batchMode and len(pTargetList) > 1:
+                    if pArgs.targetFileFolder != '.':
+                        target_file = pArgs.targetFileFolder + '/' + pTargetList[i]
+                    else:
+                        target_file = pTargetList[i]
+                elif pArgs.batchMode and len(pTargetList) == 1:
+                    target_file = None
                 else:
                     target_file = pTargetList[i]
-            elif pArgs.batchMode and len(pTargetList) == 1:
-                target_file = None
-            else:
-                target_file = pTargetList[i]
 
-            accepted_scores = filter_scores_target_list(interaction_file_data, pTargetList=target_file, pTargetIntervalTree=target_regions_intervaltree)
+                accepted_scores = filter_scores_target_list(interaction_file_data, pTargetList=target_file, pTargetIntervalTree=target_regions_intervaltree)
 
-            if len(accepted_scores) == 0:
-                # do not call 'break' or 'continue'
-                # with this an empty file is written and no track of 'no significant interactions' detected files needs to be recorded.
-                if pArgs.batchMode:
-                    with open('errorLog.txt', 'a+') as errorlog:
-                        errorlog.write('Failed for: {} and {}.\n'.format(interactionFile[0], interactionFile[1]))
-                else:
-                    log.info('No target regions found')
-            outFileName = '.'.join(sample.split('/')[-1].split('.')[:-1]) + '_' + pArgs.outFileNameSuffix
-
-            if pArgs.batchMode:
-                outfile_names.append(outFileName)
-            if pArgs.outputFolder != '.':
-                outFileName = pArgs.outputFolder + '/' + outFileName
+                if len(accepted_scores) == 0:
+                    # do not call 'break' or 'continue'
+                    # with this an empty file is written and no track of 'no significant interactions' detected files needs to be recorded.
+                    if pArgs.batchMode:
+                        with open('errorLog.txt', 'a+') as errorlog:
+                            errorlog.write('Failed for: {} and {}.\n'.format(interactionFile[0], interactionFile[1]))
+                    else:
+                        log.info('No target regions found')
+                outFileName = '.'.join(sample.split('/')[-1].split('.')[:-1]) + '_' + pArgs.outFileNameSuffix
 
-            write(outFileName, header, accepted_scores,
-                  interaction_file_data)
+                if pArgs.batchMode:
+                    outfile_names.append(outFileName)
+                if pArgs.outputFolder != '.':
+                    outFileName = pArgs.outputFolder + '/' + outFileName
+
+                write(outFileName, header, accepted_scores,
+                      interaction_file_data)
+    except Exception as exp:
+        pQueue.put('Fail: ' + str(exp))
+        return
     if pQueue is None:
         return
     pQueue.put(outfile_names)
@@ -225,6 +229,8 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
     queue = [None] * pArgs.threads
     process = [None] * pArgs.threads
     thread_done = [False] * pArgs.threads
+    fail_flag = False
+    fail_message = ''
     for i in range(pArgs.threads):
 
         if i < pArgs.threads - 1:
@@ -250,6 +256,9 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
         for i in range(pArgs.threads):
             if queue[i] is not None and not queue[i].empty():
                 background_data_thread = queue[i].get()
+                if 'Fail:' in background_data_thread:
+                    fail_flag = True
+                    fail_message = background_data_thread[6:]
                 outfile_names[i] = background_data_thread
                 queue[i] = None
                 process[i].join()
@@ -261,7 +270,9 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
             if not thread:
                 all_data_collected = False
         time.sleep(1)
-
+    if fail_flag:
+        log.error(fail_message)
+        exit(1)
     outfile_names = [item for sublist in outfile_names for item in sublist]
     return outfile_names
 

diff --git a/hicexplorer/chicDifferentialTest.py b/hicexplorer/chicDifferentialTest.py
@@ -208,77 +208,83 @@ def writeResult(pOutFileName, pData, pHeaderOld, pHeaderNew, pAlpha, pTest):
 
 def run_statistical_tests(pInteractionFilesList, pArgs, pQueue=None):
     rejected_names = []
-    for interactionFile in pInteractionFilesList:
+    try:
+        for interactionFile in pInteractionFilesList:
 
-        sample_prefix = interactionFile[0].split(
-            '/')[-1].split('_')[0] + '_' + interactionFile[1].split('/')[-1].split('_')[0]
+            sample_prefix = interactionFile[0].split(
+                '/')[-1].split('_')[0] + '_' + interactionFile[1].split('/')[-1].split('_')[0]
 
-        region_prefix = '_'.join(
-            interactionFile[0].split('/')[-1].split('_')[1:6])
+            region_prefix = '_'.join(
+                interactionFile[0].split('/')[-1].split('_')[1:6])
 
-        outFileName = sample_prefix + '_' + region_prefix
-        rejected_name_output_file = outFileName + '_H0_rejected.txt'
+            outFileName = sample_prefix + '_' + region_prefix
+            rejected_name_output_file = outFileName + '_H0_rejected.txt'
 
-        if pArgs.outputFolder != '.':
-            outFileName_accepted = pArgs.outputFolder + \
-                '/' + outFileName + '_H0_accepted.txt'
-            outFileName_rejected = pArgs.outputFolder + \
-                '/' + outFileName + '_H0_rejected.txt'
-            outFileName = pArgs.outputFolder + '/' + outFileName + '_results.txt'
-        else:
-            outFileName_accepted = outFileName + '_H0_accepted.txt'
-            outFileName_rejected = outFileName + '_H0_rejected.txt'
-            outFileName = outFileName + '_results.txt'
-
-        if pArgs.interactionFileFolder != '.':
-            absolute_sample_path1 = pArgs.interactionFileFolder + '/' + interactionFile[0]
-            absolute_sample_path2 = pArgs.interactionFileFolder + '/' + interactionFile[1]
-
-        else:
-            absolute_sample_path1 = interactionFile[0]
-            absolute_sample_path2 = interactionFile[1]
+            if pArgs.outputFolder != '.':
+                outFileName_accepted = pArgs.outputFolder + \
+                    '/' + outFileName + '_H0_accepted.txt'
+                outFileName_rejected = pArgs.outputFolder + \
+                    '/' + outFileName + '_H0_rejected.txt'
+                outFileName = pArgs.outputFolder + '/' + outFileName + '_results.txt'
+            else:
+                outFileName_accepted = outFileName + '_H0_accepted.txt'
+                outFileName_rejected = outFileName + '_H0_rejected.txt'
+                outFileName = outFileName + '_results.txt'
 
-        header1, line_content1, data1 = readInteractionFile(absolute_sample_path1)
-        header2, line_content2, data2 = readInteractionFile(absolute_sample_path2)
+            if pArgs.interactionFileFolder != '.':
+                absolute_sample_path1 = pArgs.interactionFileFolder + '/' + interactionFile[0]
+                absolute_sample_path2 = pArgs.interactionFileFolder + '/' + interactionFile[1]
 
-        if len(line_content1) == 0 or len(line_content2) == 0:
-            writeResult(outFileName, None, header1, header2,
+            else:
+                absolute_sample_path1 = interactionFile[0]
+                absolute_sample_path2 = interactionFile[1]
+
+            header1, line_content1, data1 = readInteractionFile(absolute_sample_path1)
+            header2, line_content2, data2 = readInteractionFile(absolute_sample_path2)
+
+            if len(line_content1) == 0 or len(line_content2) == 0:
+                writeResult(outFileName, None, header1, header2,
+                            pArgs.alpha, pArgs.statisticTest)
+                writeResult(outFileName_accepted, None, header1, header2,
+                            pArgs.alpha, pArgs.statisticTest)
+                writeResult(outFileName_rejected, None, header1, header2,
+                            pArgs.alpha, pArgs.statisticTest)
+                rejected_names.append(rejected_name_output_file)
+                continue
+            if pArgs.statisticTest == 'chi2':
+                test_result, accepted, rejected = chisquare_test(
+                    data1, data2, pArgs.alpha)
+            elif pArgs.statisticTest == 'fisher':
+                test_result, accepted, rejected = fisher_exact_test(
+                    data1, data2, pArgs.alpha)
+
+            write_out_lines = []
+            for i, result in enumerate(test_result):
+                write_out_lines.append(
+                    [line_content1[i], line_content2[i], result, data1[i], data2[i]])
+
+            write_out_lines_accepted = []
+            for result in accepted:
+                write_out_lines_accepted.append(
+                    [line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])
+
+            write_out_lines_rejected = []
+            for result in rejected:
+                write_out_lines_rejected.append(
+                    [line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])
+
+            writeResult(outFileName, write_out_lines, header1, header2,
                         pArgs.alpha, pArgs.statisticTest)
-            writeResult(outFileName_accepted, None, header1, header2,
+            writeResult(outFileName_accepted, write_out_lines_accepted, header1, header2,
                         pArgs.alpha, pArgs.statisticTest)
-            writeResult(outFileName_rejected, None, header1, header2,
+            writeResult(outFileName_rejected, write_out_lines_rejected, header1, header2,
                         pArgs.alpha, pArgs.statisticTest)
             rejected_names.append(rejected_name_output_file)
-            continue
-        if pArgs.statisticTest == 'chi2':
-            test_result, accepted, rejected = chisquare_test(
-                data1, data2, pArgs.alpha)
-        elif pArgs.statisticTest == 'fisher':
-            test_result, accepted, rejected = fisher_exact_test(
-                data1, data2, pArgs.alpha)
-
-        write_out_lines = []
-        for i, result in enumerate(test_result):
-            write_out_lines.append(
-                [line_content1[i], line_content2[i], result, data1[i], data2[i]])
-
-        write_out_lines_accepted = []
-        for result in accepted:
-            write_out_lines_accepted.append(
-                [line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])
-
-        write_out_lines_rejected = []
-        for result in rejected:
-            write_out_lines_rejected.append(
-                [line_content1[result[0]], line_content2[result[0]], result[1], data1[result[0]], data2[result[0]]])
-
-        writeResult(outFileName, write_out_lines, header1, header2,
-                    pArgs.alpha, pArgs.statisticTest)
-        writeResult(outFileName_accepted, write_out_lines_accepted, header1, header2,
-                    pArgs.alpha, pArgs.statisticTest)
-        writeResult(outFileName_rejected, write_out_lines_rejected, header1, header2,
-                    pArgs.alpha, pArgs.statisticTest)
-        rejected_names.append(rejected_name_output_file)
+
+    except Exception as exp:
+        pQueue.put('Fail: ' + str(exp))
+        return
+
     if pQueue is None:
         return
     pQueue.put(rejected_names)
@@ -313,6 +319,8 @@ def main(args=None):
                     (args.interactionFile[i], args.interactionFile[i + 1]))
                 i += 2
 
+    fail_flag = False
+    fail_message = ''
     if args.batchMode:
         rejected_file_names = [None] * args.threads
         interactionFilesPerThread = len(interactionFileList) // args.threads
@@ -342,6 +350,9 @@ def main(args=None):
             for i in range(args.threads):
                 if queue[i] is not None and not queue[i].empty():
                     background_data_thread = queue[i].get()
+                    if 'Fail:' in background_data_thread:
+                        fail_flag = True
+                        fail_message = background_data_thread[6:]
                     rejected_file_names[i] = background_data_thread
                     queue[i] = None
                     process[i].join()
@@ -353,6 +364,9 @@ def main(args=None):
                 if not thread:
                     all_data_collected = False
             time.sleep(1)
+        if fail_flag:
+            log.error(fail_message)
+            exit(1)
     else:
         run_statistical_tests(interactionFileList, args)