In [5]:
from janus import JANUS, utils
from rdkit import Chem, RDLogger
from rdkit.Chem import AllChem, RDConfig, Descriptors
RDLogger.DisableLog("rdApp.*")

import torch
import selfies

def fitness_function(smi: str) -> float:
    """ User-defined function that takes in individual smiles
    and outputs a fitness value.
    """
    # logP fitness
    return Descriptors.MolLogP(Chem.MolFromSmiles(smi))

def custom_filter(smi: str):
    """ Function that takes in a smile and returns a boolean.
    True indicates the smiles PASSES the filter.
    """
    # smiles length filter
    if len(smi) > 81 or len(smi) == 0:
        return False
    else:
        return True

torch.multiprocessing.freeze_support()

# all parameters to be set, below are defaults
params_dict = {
    # Number of iterations that JANUS runs for
    "generations": 200,

    # The number of molecules for which fitness calculations are done,
    # exploration and exploitation each have their own population
    "generation_size": 5000,

    # Number of molecules that are exchanged between the exploration and exploitation
    "num_exchanges": 5,

    # Callable filtering function (None defaults to no filtering)
    "custom_filter": custom_filter,

    # Fragments from starting population used to extend alphabet for mutations
    "use_fragments": True,

    # An option to use a classifier as selection bias
    "use_classifier": True,
}

# Set your SELFIES constraints (below used for manuscript)
default_constraints = selfies.get_semantic_constraints()
new_constraints = default_constraints
new_constraints['S'] = 2
new_constraints['P'] = 3
selfies.set_semantic_constraints(new_constraints)  # update constraints

# Create JANUS object.
agent = JANUS(
    work_dir = 'RESULTS',                                   # where the results are saved
    fitness_function = fitness_function,                    # user-defined fitness for given smiles
    start_population = "../Data/sample_start_smiles.txt",   # file with starting smiles population
    **params_dict
)

agent.run()

    Unique and valid fragments generated: 107308
On generation 0/200


  prob_ = 1.0 / (3.0 ** ((F_50_val - fitness) / (F_50_val - F_25_val)) + 1)


    (Explr) Top Fitness: 15.811199999999957
    (Explr) Top Smile: CCCCCCCCCCCCCCCCOP(=O)(O)Oc1ccc(C=Cc2ccc(OP(=O)(O)OCCCCCCCCCCCCCCCC)cc2)cc1
    (Local) Top Fitness: 18.758599999999984
    (Local) Top Smile: CCCCCCCCCCCCCCCCOOCc1cc(I)ccc1C=C(C=CP=O)C=Cc1ccc(OP(COO)OCCCCCCCCCCCCCCCC)cc1
On generation 1/200
    Training classifier neural net...
No GPU available, defaulting to CPU.
        Epoch:0 Loss:0.7787207961082458
                Validation loss: 0.7838078141212463
        Epoch:1000 Loss:0.13090628385543823
                Validation loss: 0.13141220808029175
        Early stopping at epoch: 985       loss: 0.13061510026454926
    Obtaining Predictions
No GPU available, defaulting to CPU.
Number of batches:  109
        Predicting Batch: 0/109
        Predicting Batch: 1/109
        Predicting Batch: 2/109
        Predicting Batch: 3/109
        Predicting Batch: 4/109
        Predicting Batch: 5/109
        Predicting Batch: 6/109
        Predicting Batch: 7/109
        Predict


KeyboardInterrupt

