In [2]:
# standard imports
import numpy as np
import pandas as pd
import random
from datetime import datetime

# import custom modules
import sys

sys.path.append("..")
import project_config

### Map Fragments to Model Scores
```bash
python SyntheMol/inaki_map_fragments_to_model_scores.py \
    --fragment_path "Data/synthesis_data/2021q3-4_Enamine_REAL_reagents_SMILES_no_salts.csv" \
    --model_paths "Data/property_predictors/chemprop_bbb_predictor" "Data/property_predictors/chemprop_drd2_predictor" \
    --save_path "Data/property_predictors/fragments_to_model_scores.json" \
    --model_type chemprop \
    --fingerprint_type rdkit
```

## Generate Molectules Using MCTS
Chemprop with RDKit
```bash
python SyntheMol/tree_search.py \
    --model_paths "Data/property_predictors/chemprop_bbb_predictor" "Data/property_predictors/chemprop_drd2_predictor" \
    --fragment_path "Data/synthesis_data/2021q3-4_Enamine_REAL_reagents_SMILES_no_salts.csv" \
    --reaction_to_reagents_path "Data/synthesis_data/reaction_to_reagents_REAL_space.json" \
    --fragment_to_model_score_path "Data/property_predictors/fragments_to_model_scores.json" \
    --save_dir "Data/generations_nada/mcts_chemprop_rdkit" \
    --search_type mcts \
    --model_type chemprop \
    --fingerprint_type rdkit \
    --n_rollout 20000 \
    --fragment_diversity \
    --max_reactions 1
```

Chemprop with RDKit + ADMET
```bash
python SyntheMol/inaki_tree_search.py \
    --model_path "Data/property_predictors/chemprop_combined_predictor" \
    --fragment_path "Data/synthesis_data/2021q3-4_Enamine_REAL_reagents_SMILES_no_salts.csv" \
    --reaction_to_reagents_path "Data/synthesis_data/reaction_to_reagents_REAL_space.json" \
    --save_dir "Data/generations_iclr/mcts_chemprop_rdkit" \
    --search_type mcts \
    --model_type chemprop \
    --fingerprint_type rdkit \
    --n_rollout 2000 \
    --fragment_diversity \
    --max_reactions 1
```

Random
```bash
python SyntheMol/tree_search.py \
    --fragment_path "Data/synthesis_data/2021q3-4_Enamine_REAL_reagents_SMILES_no_salts.csv" \
    --reaction_to_reagents_path "Data/synthesis_data/reaction_to_reagents_REAL_space.json" \
    --save_dir "Data/generations_nada/random" \
    --search_type random \
    --n_rollout 20000 \
    --max_reactions 1
```

Random + ADMET
```bash
python SyntheMol/tree_search.py \
    --fragment_path "Data/synthesis_data/2021q3-4_Enamine_REAL_reagents_SMILES_no_salts.csv" \
    --reaction_to_reagents_path "Data/synthesis_data/reaction_to_reagents_REAL_space.json" \
    --save_dir "Data/generations_iclr/random" \
    --search_type random \
    --n_rollout 20000 \
    --max_reactions 1
```

## Make Predictions on Random Molecules
Chemprop with RDKit on random molecules
```bash
python SyntheMol/inaki_predict_model.py \
    --data_path "Data/generations_nada/random/molecules.csv" \
    --model_paths "Data/property_predictors/chemprop_bbb_predictor" "Data/property_predictors/chemprop_drd2_predictor" \
    --model_type chemprop \
    --fingerprint_type rdkit \
    --average_preds
```

Chemprop with RDKit on random molecules + ADMET
```bash
python SyntheMol/inaki_predict_model.py \
    --data_path "Data/generations_iclr/random/molecules.csv" \
    --model_path "Data/property_predictors/chemprop_combined_predictor" \
    --model_type chemprop \
    --fingerprint_type rdkit \
    --average_preds
```

In [None]:
# Make predictions on random molecules

## Assess Generated Molecules
1. Fragment counts
2. Maximum similarity within generated molecules
3. Maximum similarity with training data (BBB and DRD2)
4. Reaction counts (we should probably remove this or add an identifier of the reaction name to the bars)
5. Score distribution with random generation
6. Score distribution with guided generation

```bash
#!/bin/bash

for NAME in mcts_chemprop_rdkit random
do
python SyntheMol/inaki_assess_generated_molecules.py \
    --data_path "Data/generations_nada/${NAME}/molecules.csv" \
    --save_dir "Data/generations_nada/assesment/${NAME}" \
    --reference_paths "Data/DRD2/DRD2_binding_data.csv" "Data/B3DB/B3DB_classification_preprocessed.csv"
done

for NAME in mcts_chemprop_rdkit random
do
python SyntheMol/inaki_assess_generated_molecules.py \
    --data_path "Data/generations_nada/${NAME}/molecules.csv" \
    --save_dir "Data/generations_nada/${NAME}/new_assesment_mult" \
    --reference_paths "Data/DRD2/DRD2_binding_data.csv" "Data/B3DB/B3DB_classification_preprocessed.csv" \
    --score_column "mult_chemprop_rdkit_ensemble_preds"
done
```

```bash
#!/bin/bash

for NAME in mcts_chemprop_rdkit random
do
for COLUMN in chemprop_rdkit_ensemble_preds chemprop_rdkit_model_bbb_model_1_preds chemprop_rdkit_model_bbb_model_0_preds chemprop_rdkit_model_drd2_model_0_preds chemprop_rdkit_model_drd2_model_1_preds chemprop_rdkit_model_admet_preds
do
python SyntheMol/inaki_assess_generated_molecules.py \
    --data_path "Data/generations_iclr_post/${NAME}/molecules.csv" \
    --save_dir "Data/generations_iclr_post/${NAME}/${COLUMN}" \
    --reference_paths "Data/DRD2/DRD2_binding_data.csv" "Data/B3DB/B3DB_classification_preprocessed.csv" \
    --score_column "${COLUMN}"
done
done
```

```bash
#!/bin/bash

for NAME in random
do
for COLUMN in chemprop_rdkit_model_mult_comb_preds chemprop_rdkit_model_bbb_model_0_preds chemprop_rdkit_model_drd2_model_0_preds chemprop_rdkit_model_admet_preds
do
python SyntheMol/inaki_assess_generated_molecules.py \
    --data_path "Data/generations_iclr_mult/${NAME}/molecules.csv" \
    --save_dir "Data/generations_iclr_mult/${NAME}/${COLUMN}" \
    --reference_paths "Data/DRD2/DRD2_binding_data.csv" "Data/B3DB/B3DB_classification_preprocessed.csv" \
    --score_column "${COLUMN}"
done
done
```