diff --git a/workflow_peakcaller/scripts/multipoisson_peak_calling.py b/workflow_peakcaller/scripts/multipoisson_peak_calling.py index d454e9a..de30169 100644 --- a/workflow_peakcaller/scripts/multipoisson_peak_calling.py +++ b/workflow_peakcaller/scripts/multipoisson_peak_calling.py @@ -43,6 +43,7 @@ def poisson_filter(edit_c_df): prefix = 'subregion_' region_sites['{}fraction'.format(prefix)] = (region_sites['{}conversions'.format(prefix)]-region_sites['edited_bases'])/(region_sites['{}coverage'.format(prefix)] - region_sites['target_bases']) + region_sites.replace([np.inf, -np.inf], np.nan, inplace=True) region_sites = region_sites.fillna(0) depth_windows = [0, 10, 20, 30, 40, 50, 1000000] @@ -129,4 +130,4 @@ def output_blank(): else: write('\nNo values, outputting empty file\n') - output_blank() \ No newline at end of file + output_blank() diff --git a/workflow_peakcaller/scripts/score_peaks.py b/workflow_peakcaller/scripts/score_peaks.py index 5d78724..ef710e2 100644 --- a/workflow_peakcaller/scripts/score_peaks.py +++ b/workflow_peakcaller/scripts/score_peaks.py @@ -2,7 +2,7 @@ import os import pandas as pd import sys -import pandas as pd +import numpy as np from scipy.stats import nbinom parser = argparse.ArgumentParser(description='Add statistical score to peaks') @@ -10,24 +10,21 @@ parser.add_argument('peaks_with_scores', type=str) parser.add_argument('all_windows_with_fdr') - - args = parser.parse_args() peaks_with_edit_fraction = args.peaks_with_edit_fraction peaks_with_scores = args.peaks_with_scores all_windows_with_fdr = args.all_windows_with_fdr all_windows = pd.read_csv(all_windows_with_fdr, sep='\t') +all_windows.replace([np.inf, -np.inf], np.nan, inplace=True) +all_windows = all_windows.fillna(0) background_rate = all_windows.subregion_fraction.mean() - print("Input file specified: {}".format(peaks_with_edit_fraction)) print("Input file specified: {}".format(peaks_with_scores)) - print("Background rate for negative binomial peak scoring calculated as: {}".format(background_rate)) - def get_confidence(conversions, coverage, frac=background_rate): # # Determine nbinom CDF # (nbinom.cdf(nbr_failures,nbr_successes,prob_success))