# **Notebook for running and editing the Website**

# Install packages

In [3]:
!pip install streamlit
!pip install rdkit
!pip install pubchempy
!npm install localtunnel
!pip install py3Dmol
!pip install mordred

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K
up to date, audited 23 packages in 903ms
[1G[0K⠴[1G[0K
[1G[0K⠴[1G[0K3 packages are looking for funding
[1G[0K⠴[1G[0K  run `npm fund` for details
[1G[0K⠴[1G[0K
2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
Collecting mordred
  Downloading mordred-1.2.0.tar.gz (128 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.8/128.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting networkx==2.* (from mordred)
  Downloading networkx-2.8.8-py3-none-any.whl.metadata (5.1 kB)
Downloading networkx-2.8.8-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: mordred
  Building wheel for mordred (se

# Write Scripts for the Website

In [13]:
%%writefile descriptor.py
from mordred import Calculator, descriptors
from rdkit.Chem import AllChem
from rdkit import Chem

def get_descriptors(smile):
  '''
  This function transforms the smiles string into an rdkit mol object, then calculates relavent descriptors
  Descriptors are generated using mordred python library
  Mordred descriptors: https://mordred-descriptor.github.io/documentation/master/descriptors.html
  '''
  mol = AllChem.MolFromSmiles(smile)
  mol = Chem.AddHs(mol)
  AllChem.EmbedMolecule(mol)

  descriptor_list = {'nAcid', 'nBase', 'nAromAtom', 'nAtom', 'nSpiro', 'nBridgehead', 'nHetero', 'nB',
                     'nN', 'nO', 'nS', 'nP', 'nF', 'nCl', 'nBr', 'nI', 'nX', 'ATS0Z', 'ATS0p', 'AATS0dv',
                     'AATS0d', 'AATS0Z', 'AATS0v', 'AATS0i', 'ATSC1dv', 'ATSC2dv', 'ATSC3dv', 'ATSC4dv',
                     'ATSC5dv', 'ATSC6dv', 'ATSC7dv', 'ATSC8dv', 'ATSC2d', 'ATSC3d', 'ATSC4d', 'ATSC5d',
                     'ATSC6d', 'ATSC7d', 'ATSC8d', 'ATSC1Z', 'ATSC2Z', 'ATSC3Z', 'ATSC5Z', 'ATSC6Z',
                     'ATSC7Z', 'ATSC8Z', 'ATSC1v', 'ATSC2v', 'ATSC3v', 'ATSC4v', 'ATSC5v', 'ATSC6v',
                     'ATSC7v', 'ATSC8v', 'ATSC0p', 'ATSC2p', 'ATSC3p', 'ATSC4p', 'ATSC5p', 'ATSC6p',
                     'ATSC7p', 'ATSC1i', 'ATSC2i', 'ATSC3i', 'ATSC4i', 'ATSC5i', 'ATSC6i', 'ATSC7i',
                     'ATSC8i', 'AATSC0dv', 'AATSC0Z', 'AATSC0v', 'AATSC0i', 'BalabanJ', 'nBondsD',
                     'nBondsT', 'C2SP1', 'C1SP2', 'C3SP2', 'C1SP3', 'C3SP3', 'C4SP3', 'FCSP3',
                     'Xch-3d', 'Xch-5d', 'Xc-4d', 'Xc-5d', 'Xc-4dv', 'Xc-6dv', 'NssssB', 'NsCH3',
                     'NdCH2', 'NtCH', 'NdsCH', 'NsssCH', 'NddC', 'NaaaC', 'NssssC', 'NsNH3', 'NsNH2',
                     'NdNH', 'NssNH', 'NaaNH', 'NsssNH', 'NdsN', 'NaaN', 'NsssN', 'NddsN', 'NaasN',
                     'NssssN', 'NsOH', 'NssO', 'NaaO', 'NsssSiH', 'NssssSi', 'NsssP', 'NsssssP', 'NsSH',
                     'NdS', 'NssS', 'NaaS', 'NdssS', 'NddssS', 'NssssGe', 'NsssAs', 'NsssdAs', 'NdSe',
                     'NssSe', 'NaaSe', 'NdssSe', 'NssssSn', 'SsssB', 'SsssCH', 'SdssC', 'SaasC', 'fMF',
                     'nHBDon', 'IC0', 'IC1', 'MIC0', 'Lipinski', 'GhoseFilter', 'FilterItLogS', 'PEOE_VSA2',
                     'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7', 'PEOE_VSA8', 'PEOE_VSA9',
                     'PEOE_VSA10', 'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'SMR_VSA6', 'SMR_VSA9', 'SlogP_VSA1',
                     'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA7', 'SlogP_VSA10', 'EState_VSA2', 'EState_VSA3',
                     'EState_VSA4', 'EState_VSA5', 'EState_VSA6', 'EState_VSA7', 'EState_VSA8', 'EState_VSA9',
                     'VSA_EState1', 'VSA_EState7', 'VSA_EState9', 'n4Ring', 'n5Ring', 'n7Ring', 'n8Ring', 'n9Ring',
                     'n10Ring', 'n11Ring', 'n12Ring', 'nG12Ring', 'nHRing', 'n6HRing', 'n8HRing', 'n12HRing', 'n3aRing',
                     'n4aRing', 'n5aRing', 'n7aRing', 'nG12aRing', 'naHRing', 'n6aHRing', 'nARing', 'n5ARing', 'n5AHRing',
                     'nFRing', 'n6FRing', 'n7FRing', 'n8FRing', 'n9FRing', 'n10FRing', 'n11FRing', 'n12FRing', 'nG12FRing',
                     'n7FHRing', 'n10FHRing', 'nG12FHRing', 'n9FaRing', 'n12FaRing', 'nG12FaRing', 'nG12FaHRing', 'nFARing',
                     'n9FARing', 'n10FARing', 'nRot', 'JGI1', 'JGI2', 'JGI3', 'JGI4', 'JGI5', 'JGI6', 'JGI7', 'JGI8', 'JGI9', 'JGI10'}

  calc = Calculator(descriptors, ignore_3D=True)  # register all descriptors
  calc.descriptors = [d for d in calc.descriptors if str(d) in descriptor_list]
  all_desc = calc.pandas([mol])

  return all_desc

Overwriting descriptor.py


In [14]:
%%writefile app.py
# normal imports
import streamlit as st
import requests

# custom file holding the method to calculate descriptors
from descriptor import get_descriptors

# chemistry imports
from rdkit import Chem
from rdkit.Chem import Draw
import pubchempy as pcp
from rdkit.Chem import AllChem
import py3Dmol

smile = ''
name = st.text_input("Enter a compund name")

try:
  c = pcp.get_compounds(name, 'name')
  smile = c[0].isomeric_smiles
  st.write("You SMILE string is", smile)

except:
  st.write(name, "is not listed in the database")
  smile = ''


# Makes rdkit mol object
chemical = Chem.MolFromSmiles(smile)
if chemical:
    # Makes python PIL object so we can display molecule
    img = Draw.MolToImage(chemical)
    st.image(img, caption="Molecule Image")


if st.button("Generate descriptors"):
    molecule_embedding = get_descriptors(smile)
    st.write(molecule_embedding)


Overwriting app.py


# Run the website
The following commands hosts website and provides url

In [15]:
# IMPORTANT: this is the "tunnel password"
!wget -q -O - ipv4.icanhazip.com

34.106.235.131


In [16]:
!streamlit run /content/app.py &>/content/logs.txt &

In [17]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://lemon-banks-stay.loca.lt
^C
