# Nucleic Acid - Ligand Database (NALDB) RNA Ligands Extraction

CONTENT
1. Packages & Libraries
2. Custom Functions
  - `NALDBFetchComp(APBD1)`
  - `pubmedSearch(pubmed_id, abstract, PDB_record, authors, title)`
  - `PubChemCID(SMILE)`
  - `RNACheck(title, abstract, mesh_terms)`
  - `PubChemData(pubchem_cid)`
  - `PubChemSDF(sdf_URL)`

3. Extracting & Processing Data
  1. Section I - Nucleic Acid Aptamer Binding Ligands
  2. Section II - Nucleic Acid Special Structure Binding Ligand
  3. Section III - G-quadruplex RNA Binding Ligands
  4. Section IV - Double-stranded RNA Binding Ligands

## Packages & Libraries

In [None]:
! pip install pubchempy
! pip install biopython



In [None]:
# For Webscrapping & REST API
import re
import requests
from bs4 import BeautifulSoup

# Accessing Bio/ Chem databases
from Bio import Entrez
from Bio import Medline
import pubchempy as pcp

# Data processing
import numpy as np
import pandas as pd

# Miscellaneous
import os
import time
from tqdm import tqdm
from datetime import datetime

In [None]:
start=datetime.now()

## Custom Functions

### NALDBFetchComp(APBD1)

In [None]:
def NALDBFetchComp(comp_id):
  """
  Webscrapping  function fetches the data from html page related to given 
  compound present in NALDB based on their compound ID (e.g., APBD10)
  """

  page = requests.get(f"http://bsbe.iiti.ac.in/bsbe/naldb/{comp_id}.php")

  soup = BeautifulSoup(page.content, 'html.parser')

  # Extract content from <td> tags
  td_html_tag = soup.select('td')

  # Extract image src from <img> tags
  img_html_tag = soup.select('img')

  meta_data = dict()

  # 'NALDB_ID'
  naldb_id = soup.title.text

  # 'pubmed_ID'
  pubmed_ID = td_html_tag[9].text.strip()

  # 'ligand_name'
  ligand_name = td_html_tag[4].text.strip()

  # 'target_name'
  target_name = td_html_tag[2].text.strip()

  # 'canonical_SMILES'
  canonical_SMILES = td_html_tag[11].text

  return ((naldb_id, pubmed_ID, ligand_name, target_name, canonical_SMILES))

### pubmedSearch(pubmed_id, abstract, PDB_record, authors, title)

In [None]:
def pubmedSearch(pubmed_id):
  """
  Get the PDB ID of corresponding Pubmed entry. 
  If PDB ID present returns the ID, or else 'None'
  """
  handle = Entrez.efetch(
      db="pubmed", 
      id=pubmed_id, 
      rettype="medline", 
      retmode="text")
    
  records = Medline.parse(handle)
  records = list(records)[0]
  return records

### PubChemCID(SMILE)

In [None]:
def PubChemCID(SMILE):
  """
  Returns PubChem CID (ID) of given compound using
  PubChem CID through pubchempy (pcp) python library
  """
  cid = pcp.get_compounds(SMILE, 'smiles')
  cid_extracted = re.findall(r'\d+', str(cid[0]))

  if len(cid_extracted)>=1:
    return cid_extracted[0]
  else:
    return str("IDENTICAL_COMPOUND_ABSENT")

### RNACheck(title, abstract, mesh_terms)

In [None]:
def RNACheck(title, abstract, mesh_terms):
  """Checks whether the given entries of 
  MeSH terms, Abtracts and Title contain RNA/Ribosome/Poly(A) terms"""

  # Checking whether the word 'RNA' occurs in MeSH terms
  # if present would return boolean value - True, else False
  rna = any(["RNA" in (term.replace('/*', ',')) for term in mesh_terms])

  # Checking whether the word 'Ribosome' occurs in MeSH terms
  # if present would return boolean value - True, else False
  ribosome = any(["ribosome" in term.replace('/*', ',').lower() for term in mesh_terms])

  # Checking whether the word 'Poly(a)' occurs in MeSH terms, 
  # if present would return boolean value - True, else False
  poly_a = any(["poly(a)" in term.replace('/*', ',').lower() for term in mesh_terms])

  # Boolean operator 'any' would return True 
  # if one of the cases are True, else False
  # i.e. either 'RNA' or 'Ribosome' or 'Poly(A)' 
  # should be present to return True
  rna_mesh = any([rna, ribosome, poly_a])

  rna_abstract = "RNA" in abstract

  rna_title = "RNA" in title

  # if 'RNA' word present in either MeSh terms, 
  # Abstract or Title return True, else False
  rna_check = any([rna_title, rna_abstract, rna_mesh])

  return rna_check

### PubChemData(pubchem_cid)

In [None]:
def PubChemData(pubchem_cid):
  """
  Returns PubChem information of given compound using
  PubChem CID through pubchempy (pcp) python library
  """
  compound = pcp.Compound.from_cid(pubchem_cid)

  sdf_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{pubchem_cid}/SDF"
  image_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{pubchem_cid}/PNG"

  pubchem_meta_data = dict()

  pubchem_meta_data["pubchem_id"] = pubchem_cid
  pubchem_meta_data["molecular_formula"] = compound.molecular_formula
  pubchem_meta_data["molecular_weight"] = compound.molecular_weight
  pubchem_meta_data["isomeric_smiles"] = compound.isomeric_smiles
  pubchem_meta_data["iupac_name"] = compound.iupac_name
  pubchem_meta_data["xlogp"] = compound.xlogp
  pubchem_meta_data["image_url"] = image_url
  pubchem_meta_data['SDF_file'] = sdf_url

  return pubchem_meta_data

### LigandSearchPDB(SMILE)

In [None]:
def LigandSearchPDB(SMILE):
  search_request = {
    "query": {
      "type": "terminal",
      "service": "chemical",
      "parameters": {
        "value": SMILE,
        "type": "descriptor",
        "descriptor_type": "SMILES",
        "match_type": "graph-exact"
      }
    },
    "return_type": "entry"
  }

  search_url = f"https://search.rcsb.org/rcsbsearch/v1/query?json="

  request = requests.post(url=search_url, json=search_request)

  search_results = dict(request.json())

  pdb_ids_list = [search_results['result_set'][i].get('identifier') for i in range(len(search_results['result_set']))]

  return pdb_ids_list

### PubChemSDF( )


In [None]:
"""
from tqdm import tqdm
import requests

response = requests.get(download_url, stream=True)

with open(f"{cid_extracted}.sdf", "wb") as handle:
    for data in tqdm(response.iter_content()):
        handle.write(data)

"""        

'\nfrom tqdm import tqdm\nimport requests\n\nresponse = requests.get(download_url, stream=True)\n\nwith open(f"{cid_extracted}.sdf", "wb") as handle:\n    for data in tqdm(response.iter_content()):\n        handle.write(data)\n\n'

In [None]:
# ! cat /content/238.sdf

### Splitting Double Ids

In [None]:
"""double_ids = list()
double_ids_split = list()
for ids in np.unique(dataframe['pubmed_ID']):
  if len(ids.split('&'))>1:
    double_ids.append(ids)
    id1 = str(ids.split('&')[0].strip())
    double_ids_split.append(id1)
    id2 = str(ids.split('&')[1].strip())
    double_ids_split.append(id2)

ids = "12656603 &   20167243"


columns_to_fetch = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

df = dataframe.iloc[:, columns_to_fetch][dataframe['pubmed_ID']==ids]
df.insert(loc=1, column='pubmed_ID', value=id1)
dataframe.append(df)

#drop_row_index = dataframe[dataframe['pubmed_ID']==ids].index

#dataframe.drop(drop_row_index, inplace = True)    """

'double_ids = list()\ndouble_ids_split = list()\nfor ids in np.unique(dataframe[\'pubmed_ID\']):\n  if len(ids.split(\'&\'))>1:\n    double_ids.append(ids)\n    id1 = str(ids.split(\'&\')[0].strip())\n    double_ids_split.append(id1)\n    id2 = str(ids.split(\'&\')[1].strip())\n    double_ids_split.append(id2)\n\nids = "12656603 &   20167243"\n\n\ncolumns_to_fetch = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]\n\ndf = dataframe.iloc[:, columns_to_fetch][dataframe[\'pubmed_ID\']==ids]\ndf.insert(loc=1, column=\'pubmed_ID\', value=id1)\ndataframe.append(df)\n\n#drop_row_index = dataframe[dataframe[\'pubmed_ID\']==ids].index\n\n#dataframe.drop(drop_row_index, inplace = True)    '

## Extracting Data

### Section I - Nucleic Acid Aptamer Binding Ligands Data

#### Extracting NALDB Section I Entries

In [None]:
# creating list of all the Ligand ID in 
# NALDB's Nucleic Acid Aptamer Binding Ligands Data section
naldb_section_one_IDs = [f"APBD{no}" for no in np.arange(1, 484)]

# Column names for pandas dataframe and excel sheet
column_names=('naldb_ID', 'pubmed_ID', 
              'ligand_name', 'target_name', 'canonical_SMILES')

In [None]:
# Iterating over all the entries of the 
# NALDB's Nucleic Acid Aptamer Binding Ligands Data section
# and storing the values of each ligand entries
code_pause = 0
naldb_section_one_data = list()
for id in tqdm(naldb_section_one_IDs):
  naldb_section_one_data.append(NALDBFetchComp(id))
  
  if code_pause>=20:
    time.sleep(1)
    code_pause=0
  
  code_pause+=1

100%|██████████| 483/483 [04:24<00:00,  1.83it/s]


In [None]:
# Creating dataframe from fetched naldb 
# 'Nucleic acid aptamer binding ligands' entries
section_one_naldb_all = pd.DataFrame(naldb_section_one_data, columns=column_names)

In [None]:
section_one_naldb_all.head(5)

Unnamed: 0,naldb_ID,pubmed_ID,ligand_name,target_name,canonical_SMILES
0,APBD1,24168267,ATP,SSA-1,NC1=NC=NC2=C1N=CN2C1OC(COP(O)(=O)OP(O)(=O)OP(O...
1,APBD2,23971905,ABA,Aptamer 2,C\C(\C=C\[C@@]1(O)C(C)=CC(=O)CC1(C)C)=C\C(O)=O
2,APBD3,23971905,ABA,aptamer 9,C\C(\C=C\[C@@]1(O)C(C)=CC(=O)CC1(C)C)=C\C(O)=O
3,APBD4,23830440,opiµM alkaloid codeine (3-methylmorphine),HL7-14,[H][C@@]12OC3=C(OC)C=CC4=C3C11CCN(C)[C@H](C4)[...
4,APBD5,23734784,CPT1,CMA-70,[H]NC(=O)COCC(=O)NCCOCCOCCOCCOCCOCCOCCOCCOCCOC...


#### Extracting RNA Entries


In [None]:
Entrez.email = "akishirsath@gmail.com"

In [None]:
count=0
RNA_related = dict()
DNA_related = dict()

for id in tqdm(section_one_naldb_all['pubmed_ID']):

  pubmed_data = pubmedSearch(id)

  # Getting PDB ID if available
  pdb_id = pubmed_data.get('SI', "NA")

  # Getting Authors
  authors = pubmed_data.get('AU', "NA")

  # Getting Title
  title = pubmed_data.get('TI', "NA")

  # Getting Abstract
  abstract = pubmed_data.get('AB', "NA")

  # Getting research article source
  source = pubmed_data.get('SO', "NA")

  # MeSH terms
  mesh_terms = pubmed_data.get('MH', "NA")

  RNA_check = RNACheck(title, abstract, mesh_terms)

  if RNA_check:
    RNA_related[id] = {
        'PDB_ID'   : pdb_id,
        'Title'    : title,
        'Authors'  : authors,
        'Abstract' : abstract,
        'Source'   : source
    }
  else:
    DNA_related[id] = {
        'PDB_ID'   : pdb_id,
        'Title'    : title,
        'Authors'  : authors,
        'Abstract' : abstract,
        'Source'   : source
    }

  if count>=10:
    time.sleep(1)
    count=0
  count+=1

100%|██████████| 483/483 [04:36<00:00,  1.75it/s]


In [None]:
# Creating dataframe from fetched naldb 
# 'Nucleic acid aptamer binding ligands' entries
section_one_naldb_RNA = section_one_naldb_all[section_one_naldb_all['pubmed_ID'].isin(list(RNA_related.keys()))]

In [None]:
# Reset the index values to the second dataframe appends properly
section_one_naldb_RNA = section_one_naldb_RNA.reset_index(drop=True)

In [None]:
section_one_naldb_RNA.head(5)

Unnamed: 0,naldb_ID,pubmed_ID,ligand_name,target_name,canonical_SMILES
0,APBD39,25699094,Cholesterol,nucleolin aptamer,[H][C@@]1(CC[C@@]2([H])[C@]3([H])CC=C4C[C@@]([...
1,APBD40,25699094,Chlorin e6 (Ce6),aptamer TD05,[H][C@@]1(C)\C2=C\C3=C(C)C(C=C)=C(N3)\C=C3/N=C...
2,APBD54,20166743,TMPyP4,AS1411 aptamer,C[N+]1=CC=C(C=C1)C1=C2\C=CC(=N2)\C(=C2/N\C(\C=...
3,APBD114,10926496,Malachite green (MG),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=CC=C1)=C1C=CC(C=C1)=...
4,APBD115,10926496,Crystal violet (CV),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=C(C=C1)N(C)C)=C1C=CC...


#### Adding PDB & DOI columns

In [None]:
pubmed_section_one_data = list()
for id in tqdm(section_one_naldb_RNA['pubmed_ID']):
  title = RNA_related.get(id).get('Title')
  authors = ", ".join(RNA_related.get(id).get('Authors'))
  doi = RNA_related.get(id).get('Source')

  resource = "; ".join((title, authors, doi))
  pdb = RNA_related.get(id).get('PDB_ID')

  pubmed_section_one_data.append((pdb, resource))

100%|██████████| 334/334 [00:00<00:00, 123296.74it/s]


In [None]:
# Creating dataframe from fetched naldb 
# 'Nucleic acid aptamer binding ligands' entries
section_one_pubmed_RNA = pd.DataFrame(pubmed_section_one_data, columns=["PDB", "Resource"])

In [None]:
# Place the DataFrames side by side
section_one_RNA = pd.concat([section_one_naldb_RNA, section_one_pubmed_RNA], axis=1)

In [None]:
section_one_RNA.head(5)

Unnamed: 0,naldb_ID,pubmed_ID,ligand_name,target_name,canonical_SMILES,PDB,Resource
0,APBD39,25699094,Cholesterol,nucleolin aptamer,[H][C@@]1(CC[C@@]2([H])[C@]3([H])CC=C4C[C@@]([...,,Aptamers: active targeting ligands for cancer ...
1,APBD40,25699094,Chlorin e6 (Ce6),aptamer TD05,[H][C@@]1(C)\C2=C\C3=C(C)C(C=C)=C(N3)\C=C3/N=C...,,Aptamers: active targeting ligands for cancer ...
2,APBD54,20166743,TMPyP4,AS1411 aptamer,C[N+]1=CC=C(C=C1)C1=C2\C=CC(=N2)\C(=C2/N\C(\C=...,,Aptamer-based tumor-targeted drug delivery for...
3,APBD114,10926496,Malachite green (MG),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=CC=C1)=C1C=CC(C=C1)=...,[PDB/1F1T],2.8 A crystal structure of the malachite green...
4,APBD115,10926496,Crystal violet (CV),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=C(C=C1)N(C)C)=C1C=CC...,[PDB/1F1T],2.8 A crystal structure of the malachite green...


#### Fetching CID corresponding to given SMILE

In [None]:
cids_list = list()
for SMILE in tqdm(section_one_RNA['canonical_SMILES']):
  cids_list.append(PubChemCID(SMILE))

100%|██████████| 334/334 [03:01<00:00,  1.84it/s]


In [None]:
section_one_RNA["CID"]=cids_list

In [None]:
section_one_RNA.head()

Unnamed: 0,naldb_ID,pubmed_ID,ligand_name,target_name,canonical_SMILES,PDB,Resource,CID
0,APBD39,25699094,Cholesterol,nucleolin aptamer,[H][C@@]1(CC[C@@]2([H])[C@]3([H])CC=C4C[C@@]([...,,Aptamers: active targeting ligands for cancer ...,5997
1,APBD40,25699094,Chlorin e6 (Ce6),aptamer TD05,[H][C@@]1(C)\C2=C\C3=C(C)C(C=C)=C(N3)\C=C3/N=C...,,Aptamers: active targeting ligands for cancer ...,IDENTICAL_COMPOUND_ABSENT
2,APBD54,20166743,TMPyP4,AS1411 aptamer,C[N+]1=CC=C(C=C1)C1=C2\C=CC(=N2)\C(=C2/N\C(\C=...,,Aptamer-based tumor-targeted drug delivery for...,135398505
3,APBD114,10926496,Malachite green (MG),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=CC=C1)=C1C=CC(C=C1)=...,[PDB/1F1T],2.8 A crystal structure of the malachite green...,11295
4,APBD115,10926496,Crystal violet (CV),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=C(C=C1)N(C)C)=C1C=CC...,[PDB/1F1T],2.8 A crystal structure of the malachite green...,3468


#### Fetching PubChem Data for corresponding to CID

In [None]:
pubchem_data = list()

for SMILE in tqdm(section_one_RNA['CID']):
  if SMILE !="IDENTICAL_COMPOUND_ABSENT":
    data = PubChemData(SMILE)
    pubchem_data.append(tuple((list(data.values())[1:])))
    time.sleep(1)
  else:
    pubchem_data.append(('NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA'))


100%|██████████| 334/334 [04:53<00:00,  1.14it/s]


In [None]:
section_one_pubchem_RNA = pd.DataFrame(pubchem_data, 
                                   columns=["molecular_formula", 
                                            "molecular_weight", 
                                            "isomeric_smiles", 
                                            "iupac_name", 
                                            "xlogp", 
                                            "image_url", 
                                            "SDF_file"])

#### Combining all the data into one sheet (Dataframe)

In [None]:
# Reset the index values to the second dataframe appends properly
section_one_RNA = section_one_RNA.reset_index(drop=True)

In [None]:
# Place the DataFrames side by side
section_one_RNA = pd.concat([section_one_RNA, section_one_pubchem_RNA], axis=1)
section_one_RNA = section_one_RNA.reset_index(drop=True)

In [None]:
section_one_RNA.head()

Unnamed: 0,naldb_ID,pubmed_ID,ligand_name,target_name,canonical_SMILES,PDB,Resource,CID,molecular_formula,molecular_weight,isomeric_smiles,iupac_name,xlogp,image_url,SDF_file
0,APBD39,25699094,Cholesterol,nucleolin aptamer,[H][C@@]1(CC[C@@]2([H])[C@]3([H])CC=C4C[C@@]([...,,Aptamers: active targeting ligands for cancer ...,5997,C27H46O,386.7,C[C@H](CCCC(C)C)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3...,"(3S,8S,9S,10R,13R,14S,17R)-10,13-dimethyl-17-[...",8.7,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...
1,APBD40,25699094,Chlorin e6 (Ce6),aptamer TD05,[H][C@@]1(C)\C2=C\C3=C(C)C(C=C)=C(N3)\C=C3/N=C...,,Aptamers: active targeting ligands for cancer ...,IDENTICAL_COMPOUND_ABSENT,,,,,,,
2,APBD54,20166743,TMPyP4,AS1411 aptamer,C[N+]1=CC=C(C=C1)C1=C2\C=CC(=N2)\C(=C2/N\C(\C=...,,Aptamer-based tumor-targeted drug delivery for...,135398505,C44H38N8+4,678.8,C[N+]1=CC=C(C=C1)C2=C3C=CC(=C(C4=NC(=C(C5=CC=C...,"5,10,15,20-tetrakis(1-methylpyridin-1-ium-4-yl...",6.7,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...
3,APBD114,10926496,Malachite green (MG),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=CC=C1)=C1C=CC(C=C1)=...,[PDB/1F1T],2.8 A crystal structure of the malachite green...,11295,C23H25N2+,329.5,CN(C)C1=CC=C(C=C1)C(=C2C=CC(=[N+](C)C)C=C2)C3=...,[4-[[4-(dimethylamino)phenyl]-phenylmethyliden...,4.8,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...
4,APBD115,10926496,Crystal violet (CV),malachite green RNA aptamer (MGA),CN(C)C1=CC=C(C=C1)C(C1=CC=C(C=C1)N(C)C)=C1C=CC...,[PDB/1F1T],2.8 A crystal structure of the malachite green...,3468,C25H30N3+,372.5,CN(C)C1=CC=C(C=C1)C(=C2C=CC(=[N+](C)C)C=C2)C3=...,[4-[bis[4-(dimethylamino)phenyl]methylidene]cy...,4.9,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...,https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp...


In [None]:
section_one_RNA.to_csv("Section_One_RNA_Ligands_Automated_Fetched_Data.csv")

section_one_RNA.to_excel("Section_All_RNA_Ligands_Automated_Fetched_Data.xlsx", sheet_name="Section-I")

In [None]:
! mkdir Excel-RNA-Ligands-Automated-Data
! mkdir CSV-RNA-Ligands-Automated-Data

mkdir: cannot create directory ‘Excel-RNA-Ligands-Automated-Data’: File exists
mkdir: cannot create directory ‘CSV-RNA-Ligands-Automated-Data’: File exists


In [None]:
! mv /content/Section_One_RNA_Ligands_Automated_Fetched_Data.csv /content/CSV-RNA-Ligands-Automated-Data
! mv /content/Section_All_RNA_Ligands_Automated_Fetched_Data.xlsx /content/Excel-RNA-Ligands-Automated-Data

In [None]:
! cp -r /content/CSV-RNA-Ligands-Automated-Data /content/drive/MyDrive/Colab\ Notebooks/RNA-LIGAND-DATABASE
! cp -r /content/Excel-RNA-Ligands-Automated-Data /content/drive/MyDrive/Colab\ Notebooks/RNA-LIGAND-DATABASE

#### Downloading SDF files

In [None]:
! mkdir SDF-RNA-Ligands-Automated-Data-Non-Redundant

mkdir: cannot create directory ‘SDF-RNA-Ligands-Automated-Data-Non-Redundant’: File exists


In [None]:
path = "/content/SDF-RNA-Ligands-Automated-Data-Non-Redundant"

In [None]:
non_redundant_pubchem = np.unique(section_one_RNA['CID'])

In [None]:
for cid in tqdm(non_redundant_pubchem):
  
  if cid != "IDENTICAL_COMPOUND_ABSENT":
    download_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/SDF"

    response = requests.get(download_url, stream=True)

    with open(f"{path}/{cid.strip()}.sdf", "wb") as handle:
        for data in response.iter_content():
            handle.write(data)

100%|██████████| 73/73 [00:08<00:00,  8.23it/s]


In [None]:
sdf_files = len(os.listdir(path))
print(f"{sdf_files} SDF files downloaded")

72 SDF files downloaded


In [None]:
! cp -r /content/SDF-RNA-Ligands-Automated-Data-Non-Redundant /content/drive/MyDrive/Colab\ Notebooks/RNA-LIGAND-DATABASE

In [None]:
end=datetime.now()

print(f"Time took to run whole notebook: {end-start}")

Time took to run whole notebook: 0:17:06.950717


In [None]:
# Storing entries which are related to DNA for manual checking
section_one_naldb_DNA = section_one_naldb_all[section_one_naldb_all['pubmed_ID'].isin(list(DNA_related.keys()))]

section_one_naldb_DNA.to_csv("section_one_naldb_DNA.csv")
!cp -r /content/section_one_naldb_DNA.csv /content/drive/MyDrive/Colab\ Notebooks/RNA-LIGAND-DATABASE/Miscellaneous

section_one_naldb_DNA.to_csv("section_one_naldb_DNA.xlsx")
!cp -r /content/section_one_naldb_DNA.csv /content/drive/MyDrive/Colab\ Notebooks/RNA-LIGAND-DATABASE/Miscellaneousa

### Section II - Nucleic Acid Special Structure Binding Ligand