In [None]:
from IPython.core.display import HTML
def css_styling():
    styles = open("./styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

In [None]:
# %openad clear sessions
%openad add toolkit ds4sd
%openad add toolkit rxn
%openad list toolkits
%openad set llm bam
import warnings
warnings.filterwarnings('ignore')

## Prerequisities

You must have Jupyterlab-rise installed and enabled

Use the slide show icon at top of notebook to run

Run the cells above before running

![](./media/AD_Banner.jpg)
<a id="top">

# Demonstration : Source PFAS Molecules and find Alternatives

### - Use IBM Deep Search to search for PFAS molecules
### - Use IBM's OpenAD's open-source property generation to generate additional properties
### - Merge and collate molecule data with OpenAD toolkit
### - Generate similar molecules with IBM open-source Regression Transformer 
### - With Deep Search determine if generated molecules are mentioned in a patent
### - Take one of the molecules and Use IBM RXN Retrosynthesis commands to generate a path to synthesis


### <span style="background: blue; color: white">Step 1 Use IBM Deep Search to search for PFAS molecules </span>

In [None]:
%openad set context ds4sd
%openad search collection 'PubChem' for 'PFOA OR PFOS OR PFHxS OR PFNA OR HFPO-DA'


### <span style="background: blue; color: white"> Step 2: Use IBM's OpenAD open-source property generation to generate additional properties</span>

Load molecules into a OpenAD molecule set and Initialize the list of additional proerties to generate

In [None]:
# Load the data from the datframe Style object into the molecules data set
df_data = %openadd result as dataframe

%openad load molecules using dataframe df_data

#get list of Smiles molecules
a_list = list(set(df_data['SMILES'].to_list()))

#Define list of Delta to be inferred properties
properties = ['is_scaffold', 'bertz', 'tpsa', 'logp', 'qed', 'plogp', 'penalized_logp', 'lipinski', 'sas', 'esol']

Generate and merge the additional properties

In [None]:
# Generate SMILES properties
properties = %openadd prop get molecule property {properties} for {a_list} 
%openad merge molecules data using dataframe properties

<span style="background: blue; color: white">Let's Examine the available Molecules</span>

In [None]:
# Trash: Mols2grid code -- this is replaced by our own GUI
# mol_list = %openadd export molecules
# mol_view = %openad show molecules using dataframe mol_list as molsobject
# mol_view.display()


# Display the working set
%openad show molecules

Select several molecules before running the next cell.

In [None]:
# Trash: Mols2grid code -- no longer needed
# mol_view.get_selection()

<span style="background: blue; color: white"> Drilling in on the details of a molecule </span>

In [None]:
%openad display molecule 'Perfluorononanoic acid'

# # Alternatively, display molecules in iframe, it's a bit easier to digest:
# %openad show molecule 'Perfluorononanoic acid'

### <span style="background: blue; color: white"> Step 3: Generate Similar Molecules with IBM's open-source Regression Transformer </span>

In [None]:
# Load working set into variable
mol_list = %openadd export molecules
    
datasets = []
for row in mol_list.to_dict("records"):
    MY_SMILES= row['canonical_smiles']
    # row['esol'] = 123 # Trash, just to test
    esol= float(row['esol'])
    MY_PARAMS = { "fraction_to_mask": 0.1 , "property_goal": { "<esol>": esol} }
    display("Generating Molecules for "+MY_SMILES+" with soluability:"+str(row['esol']) )
    result = %openadd gen generate with RegressionTransformerMolecules data for $MY_SMILES sample 10 \
    using(algorithm_version=solubility  search=sample temperature=1.5 tolerance=60.0 sampling_wrapper = "$MY_PARAMS" )
    display(result)
    datasets.append(result)

### <span style="background: blue; color: white">Step 4: With IBM Deep Search determine if generated molecules are mentioned in a patent</span>

In [None]:
x = 0
patent_count=0
patents_to_search=[]
patented_molecules=[]
searched_list=[]

# For all the molecules in the data set search for those with patents
for result in datasets:  
    for mol in result['0'].to_list():
        # remove duplicates
        if mol in searched_list:
            continue
        else:
            searched_list.append(mol)
        # Execute Patent Search    
        x = %openadd search for patents containing molecule '{mol}'
        
        # If has patents append to list
        if isinstance(x,DataFrame):
            patents_to_search.extend(x["PATENT ID"].to_list())
            patented_molecules.append(mol)
            print(f'patents for molecule {mol}:\n  {x["PATENT ID"].to_list()}')


### <span style="background: blue; color: white">Step 5: Add the Patented molecules to our list and generate properties for them </span>

In [None]:
#generate the new properties for all of the new molecules
properties_all = ['molecular_weight', 'number_of_aromatic_rings', 'number_of_h_acceptors', 'number_of_atoms','number_of_rings', 'number_of_rotatable_bonds', 'number_of_large_rings', 'number_of_heterocycles', 'number_of_stereocenters','is_scaffold', 'bertz', 'tpsa', 'logp', 'qed', 'plogp', 'penalized_logp', 'lipinski', 'sas', 'esol']
new_props = %openadd prop get molecule property {properties_all} for {patented_molecules} 

# For easch molecule with a patent add it to our molecule set
for x in patented_molecules:
    %openad add molecule {x} Force

%openad merge molecules data using dataframe new_props

### <span style="background: blue; color: white">Step 6: Lets Examine one of the Patented Molecules and Generate Retrosynthesis paths for it</span>

 Use the Interactive Help to find out how to create the molecule using the IBM RXN Predict Retrosynthesis capability 

In [None]:
%openad tell me about the command predict retrosynthesis providing syntax and list all available parameters

<span style="background: blue; color: white"> Run IBM RXN Retrosynthesis </span>

In [None]:
#set The RXN toolkit active
%openad set context rxn

#select the last molecule in the List
molecule = patented_molecules[-1]

%openad predict retrosynthesis  '{molecule}'
%openad enrich molecules with analysis

<span style="background: blue; color: white">Now lets take a look at what we know about the molecule</span>

In [None]:
%openad display molecule '{molecule}'