In [1]:
# Use statistical models to derive protease rules

In [2]:
import sys, glob

sys.path.append("../src/")

In [3]:
from protease_rules import ProteaseRuleLearner, LearnerConfig, RuleConfig, NScanConfig

In [4]:
conditions = [i.split("/")[-1] for i in glob.glob("../datasets/nonspecific_runs/*")]
conditions

['nep2', 'pepsin']

In [5]:
for condition in conditions:
    PRL = (ProteaseRuleLearner.from_paths(
                df_i_path="../datasets/sequences/AF25003_AF25004.csv",
                fragpipe_path=f"../datasets/nonspecific_runs/{condition}/peptide.tsv",
                cfg=LearnerConfig(bg_scope="identified", peptide_col="Peptide",
                                  rule_cfg=RuleConfig(thr_balanced=1.8, thr_broad=1.2, use_pvals=True, proline_guard=True, fdr_alpha=0.05),
                                  nscan_cfg=NScanConfig(min_starts_with_prev=25, min_enrichment=1.5, fdr_alpha=1e-3)))
           .build_observations(multi_map="all")
           .build_boundary_tables()
           .derive_c_rule()         # uses cfg.rule_cfg by default
           .run_nscan()             # uses cfg.nscan_cfg
           .compute_missed_cleavages()
           .set_missed_cleavage_percentile(95.0)
    )
    
    
    # save
    out_paths = PRL.save_results(out_dir=f"results_{condition}", condition_label=condition)
    print(condition,
          out_paths
         )

nep2 {'rules_bundle_json': 'results_nep2/rules_bundle.json', 'boundary_C_csv': 'results_nep2/tables/nep2_boundary_C.csv', 'boundary_N_csv': 'results_nep2/tables/nep2_boundary_N.csv', 'nscan_summary_csv': 'results_nep2/tables/nep2_Nscan_summary.csv', 'missed_cleavages_csv': 'results_nep2/tables/nep2_missed_cleavages.csv', 'c_enrichment_png': 'results_nep2/figs/nep2_C_P1_enrichment.png', 'n_scan_png': 'results_nep2/figs/nep2_N_scan.png', 'p1_composition_png': 'results_nep2/figs/nep2_P1_composition.png'}
pepsin {'rules_bundle_json': 'results_pepsin/rules_bundle.json', 'boundary_C_csv': 'results_pepsin/tables/pepsin_boundary_C.csv', 'boundary_N_csv': 'results_pepsin/tables/pepsin_boundary_N.csv', 'nscan_summary_csv': 'results_pepsin/tables/pepsin_Nscan_summary.csv', 'missed_cleavages_csv': 'results_pepsin/tables/pepsin_missed_cleavages.csv', 'c_enrichment_png': 'results_pepsin/figs/pepsin_C_P1_enrichment.png', 'n_scan_png': 'results_pepsin/figs/pepsin_N_scan.png', 'p1_composition_png': 're