In [1]:
%matplotlib inline
import pandas as pd
from pipeline import run_pipeline 

  from .autonotebook import tqdm as notebook_tqdm
  from rdkit.Chem import MCS


In [2]:
# broad800k mols
broad800k = pd.read_csv('../out/model_preds_on_frags_and_cpds/NG/broad800K_melis_predictions_with_FINALbayHO11152022_11_16_2022.csv')
metadata = pd.read_csv('../data/static_datasets/PublicStructures.txt', sep = '\t')
broad800k = broad800k.merge(metadata, left_on = 'smiles', right_on = 'SMILES', how = 'left')
broad800k = broad800k.drop_duplicates('smiles')
broad800k = broad800k[['smiles', 'Name', 'hit', 'hit_epi_unc']]
broad800k.to_csv('../out/model_preds_on_frags_and_cpds/NG/broad800K_melis_predictions_with_FINALbayHO11152022_with_metadata_11_16_2022.csv')

# abx scaffold definition
nitrofuran = 'O=[N+](O)c1ccco1'
sulfonamide = 'NS(=O)=O'
quinolone = 'O=c1cc[nH]c2ccccc12'

In [3]:
run_pipeline(
    fragment_path='../out/model_preds_on_frags_and_cpds/NG/enamine_frags_18mil_with_FINALbayHO11152022_melis_predictions_11_18_2022.csv',
    compound_path='../out/model_preds_on_frags_and_cpds/NG/broad800K_melis_predictions_with_FINALbayHO11152022_with_metadata_11_16_2022.csv',
    result_path='../out/fragment_algorithm_pipeline_runs/12_NG_rd1/',
    fragment_smi_col='smiles',
    compound_smi_col='smiles',
    fragment_hit_col='hit',
    compound_hit_col='hit',
    cpd_name_col='Name',
    fragment_score=0.2,
    compound_score=0.3,
    fragment_remove_pains_brenk='both',
    compound_remove_pains_brenk='both',
    fragment_druglikeness_filter=[],
    compound_druglikeness_filter=[],
    fragment_require_more_than_coh=True,
    fragment_remove_patterns=[nitrofuran, sulfonamide, quinolone],
    frags_cannot_disrupt_rings=True,
    fragment_length_threshold=0,
    display_inline_candidates=False,
    analogues_pval_diff_thresh=0,
    analogues_absolute_diff_thresh=0.05,
    toxicity_threshold_if_present=0.5,
    toxicity_threshold_require_presence=False,
    abx_path='../data/static_datasets/04052022_CLEANED_v5_antibiotics_across_many_classes.csv',
    abx_smiles_col='Smiles',
    abx_name_col='Name',
    cpd_sim_to_abx=0.5,
    train_set_path='../data/training_data/NG/FULL_10_26_2022.csv',
    train_set_smiles_col='SMILES',
    train_set_name_col='Name',
    train_set_hit_col='hit',
    train_set_just_actives=True,
    train_set_thresh=0.5,
    train_set_greater_than=True,
    cpd_sim_to_train_set=0.5,
    purch_path='../data/static_datasets/Broad_800K_purchasable.xlsx',
    purch_name_col='BROADID',
    purch_name_needs_split=True,
    tested_before_path='../data/training_data/NG/FULL_10_26_2022.csv',
    tested_before_name_col='Name',
    tested_before_name_needs_split=False
)

In [None]:
run_pipeline(
    fragment_path='../out/model_preds_on_frags_and_cpds/NG/enamine_frags_18mil_with_FINALbayHO11152022_melis_predictions_11_18_2022.csv',
    compound_path='../out/model_preds_on_frags_and_cpds/NG/extended_screen_set_with_FINALbayHO11152022_melis_predictions_11_22_2022.csv',
    result_path='../out/fragment_algorithm_pipeline_runs/13_NG_rd2/',
    fragment_smi_col='smiles',
    compound_smi_col='SMILES',
    fragment_hit_col='hit',
    compound_hit_col='hit',
    cpd_name_col='SMILES',
    fragment_score=0.25,
    compound_score=0.5,
    fragment_remove_pains_brenk='both',
    compound_remove_pains_brenk='both',
    fragment_druglikeness_filter=[],
    compound_druglikeness_filter=[],
    fragment_require_more_than_coh=True,
    fragment_remove_patterns=[nitrofuran, sulfonamide, quinolone],
    frags_cannot_disrupt_rings=True,
    fragment_length_threshold=0,
    display_inline_candidates=False,
    analogues_pval_diff_thresh=0,
    analogues_absolute_diff_thresh=0.05,
    toxicity_threshold_if_present=0.5,
    toxicity_threshold_require_presence=False,
    abx_path='../data/static_datasets/04052022_CLEANED_v5_antibiotics_across_many_classes.csv',
    abx_smiles_col='Smiles',
    abx_name_col='Name',
    cpd_sim_to_abx=0.5,
    train_set_path='../data/training_data/NG/FULL_10_26_2022.csv',
    train_set_smiles_col='SMILES',
    train_set_name_col='Name',
    train_set_hit_col='hit',
    train_set_just_actives=True,
    train_set_thresh=0.5,
    train_set_greater_than=True,
    cpd_sim_to_train_set=0.5,
    tested_before_path='../data/training_data/NG/FULL_10_26_2022.csv',
    tested_before_name_col='Name',
    tested_before_name_needs_split=False
)