# **Utilising SMILE Embeddings**

## Import Libraries

In [23]:
import xml.etree.ElementTree as ET
import pandas as pd
from rdkit import Chem
from rdkit.Chem import MolToSmiles
from transformers import RobertaTokenizer, RobertaModel
import torch
import numpy as np
from tqdm import tqdm
import faiss

## Parse DrugBank

In [24]:
def parse_drugbank_xml(xml_path):
    ns = {'db': 'http://www.drugbank.ca'}  # DrugBank namespace
    tree = ET.parse(xml_path)
    root = tree.getroot()

    drugs = []

    for drug in root.findall('db:drug', ns):
        name = drug.find('db:name', ns)
        if name is not None:
            name = name.text.strip()

        # Look for SMILES under <calculated-properties>
        smiles = None
        properties = drug.find('db:calculated-properties', ns)
        if properties is not None:
            for prop in properties.findall('db:property', ns):
                kind = prop.find('db:kind', ns)
                if kind is not None and kind.text == 'SMILES':
                    value = prop.find('db:value', ns)
                    if value is not None:
                        smiles = value.text.strip()
                        break

        if name and smiles:
            drugs.append((name, smiles))

    return pd.DataFrame(drugs, columns=['drug_name', 'smiles'])

In [25]:
df_drugs = parse_drugbank_xml("../Dataset/full database.xml")

In [26]:
df_drugs.head()

Unnamed: 0,drug_name,smiles
0,Bivalirudin,CC[C@H](C)[C@H](NC(=O)[C@H](CCC(O)=O)NC(=O)[C@...
1,Leuprolide,CCNC(=O)[C@@H]1CCCN1C(=O)[C@H](CCCNC(N)=N)NC(=...
2,Goserelin,CC(C)C[C@H](NC(=O)[C@@H](COC(C)(C)C)NC(=O)[C@H...
3,Gramicidin D,CC(C)C[C@@H](NC(=O)CNC(=O)[C@@H](NC=O)C(C)C)C(...
4,Desmopressin,NC(=O)CC[C@@H]1NC(=O)[C@H](CC2=CC=CC=C2)NC(=O)...


## Load Tokenizer + Model

In [27]:
tokenizer = RobertaTokenizer.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
model = RobertaModel.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
model.eval()

def canonicalize_smiles(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return MolToSmiles(mol, canonical=True) if mol else None

def get_embedding(smiles):
    inputs = tokenizer(smiles, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

In [28]:
embeddings = []
valid_names = []

for idx, row in tqdm(df_drugs.iterrows(), total=len(df_drugs)):
    canon_smiles = canonicalize_smiles(row['smiles'])
    if canon_smiles:
        try:
            emb = get_embedding(canon_smiles)
            embeddings.append(emb)
            valid_names.append(row['drug_name'])
        except Exception as e:
            continue  # skip failed ones

 14%|█▍        | 1688/11925 [00:25<02:27, 69.43it/s][14:26:27] Explicit valence for atom # 13 Cl, 5, is greater than permitted
 15%|█▍        | 1759/11925 [00:26<02:20, 72.49it/s][14:26:28] SMILES Parse Error: syntax error while parsing: OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]
[14:26:28] SMILES Parse Error: check for mistakes around position 84:
[14:26:28] C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O
[14:26:28] ~~~~~~~~~~~~~~~~~~~~^
[14:26:28] SMILES Parse Error: Failed parsing SMILES 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]' for input: 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]'
 20%|█▉        | 2378/11925 [00:35<02

In [29]:
embedding_matrix = np.vstack(embeddings)

## Faiss

In [30]:
dimension = embedding_matrix.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embedding_matrix)

In [31]:
topk = 4  # include original drug, remove later

results = {}

for i in range(len(embedding_matrix)):
    _, indices = index.search(embedding_matrix[i].reshape(1, -1), topk)
    similar_names = [valid_names[idx] for idx in indices[0] if idx != i]  # exclude self
    results[valid_names[i]] = similar_names[:3]

## Final Output

In [32]:
alt_df = pd.DataFrame([
    {'drug': drug, 'alternative_1': alts[0], 'alternative_2': alts[1], 'alternative_3': alts[2]}
    for drug, alts in results.items() if len(alts) >= 3
])

In [33]:
alt_df 

Unnamed: 0,drug,alternative_1,alternative_2,alternative_3
0,Bivalirudin,Semaglutide,Avexitide,PP-F11N lutetium Lu-177
1,Leuprolide,Buserelin,Deslorelin,Nerofe
2,Goserelin,Nafarelin,Triptorelin,Ganirelix
3,Gramicidin D,Nerofe,Echinomycin,Reltecimod
4,Desmopressin,Lypressin,Selepressin,Ozarelix
...,...,...,...,...
11908,Alogabat,4-(6-CYCLOHEXYLMETHOXY-9H-PURIN-2-YLAMINO)--BE...,Mizolastine,N-cyclopropyl-4-methyl-3-{2-[(2-morpholin-4-yl...
11909,Ropsacitinib,Regadenoson,Vistusertib,Golidocitinib
11910,taletrectinib,RU90395,Cadazolid,Carotegrast methyl
11911,Tolebrutinib,Tirabrutinib,Ibrutinib,Edralbrutinib


## **Validation Framework**

### Import Libraries

In [34]:
import pandas as pd
import requests
import time
import re
import json
from bs4 import BeautifulSoup
import google.generativeai as genai
from typing import List, Dict, Tuple, Optional
import os
from tqdm import tqdm

### Validation Framework

In [35]:
class PatentValidationFramework:
    def __init__(self, df: pd.DataFrame, api_key: str, query_col: str = "query", alternatives_cols: List[str] = None):
        """
        Initialize the framework with a DataFrame containing queries and alternatives.
        
        Args:
            df: DataFrame with drug queries and alternatives
            api_key: Google AI API key
            query_col: Column name containing the query compound
            alternatives_cols: List of column names containing alternative compounds
        """
        self.df = df
        self.query_col = query_col
        self.alternatives_cols = alternatives_cols if alternatives_cols else [col for col in df.columns if col != query_col]
        
        # Configure Gemini API
        genai.configure(api_key=api_key)
        
        # Select models
        self.descriptor_model = genai.GenerativeModel('gemini-2.0-flash-lite')
        self.similarity_model = genai.GenerativeModel('gemini-2.0-flash-lite')
        
        # Results storage
        self.patent_data = {}
        self.functional_descriptors = {}
        self.similarity_results = {}
        self.query_results = {}
        self.alternative_results = {}
        
        # API rate limiting
        self.pubchem_delay = 0.5  # seconds between PubChem API calls
        self.scholar_delay = 2.0  # seconds between Google Scholar requests
        self.gemini_delay = 2.5   # seconds between Gemini API calls
        
    def get_pubchem_cid(self, compound_name: str) -> List[str]:
        """
        Convert compound name to PubChem CID.
        
        Args:
            compound_name: Name of the compound
            
        Returns:
            List of PubChem CIDs
        """
        try:
            url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{compound_name}/cids/JSON"
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            
            if "IdentifierList" in data and "CID" in data["IdentifierList"]:
                return [str(cid) for cid in data["IdentifierList"]["CID"]]
            return []
        except Exception as e:
            print(f"Error retrieving CID for {compound_name}: {e}")
            return []
        finally:
            time.sleep(self.pubchem_delay)
    
    def get_patent_ids(self, cid: str, max_patents: int = 10) -> List[str]:
        """
        Get patent IDs associated with a PubChem CID.
        
        Args:
            cid: PubChem Compound ID
            max_patents: Maximum number of patents to retrieve
            
        Returns:
            List of patent IDs
        """
        try:
            url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/xrefs/PatentID/JSON"
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            
            if "InformationList" in data and "Information" in data["InformationList"]:
                info = data["InformationList"]["Information"][0]
                if "PatentID" in info:
                    return info["PatentID"][:max_patents]
            return []
        except Exception as e:
            print(f"Error retrieving patent IDs for CID {cid}: {e}")
            return []
        finally:
            time.sleep(self.pubchem_delay)
    
    def scrape_patent_info(self, patent_id: str) -> Dict[str, str]:
        """
        Scrape patent information from Google Patents.
        
        Args:
            patent_id: Patent identifier
            
        Returns:
            Dictionary with patent title, abstract, and description
        """
        try:
            patent_id = patent_id.replace("-", "")
            
            url = f"https://patents.google.com/patent/{patent_id}"
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Extract title, abstract, and description
            # Actual implementation would need to match Google Patents HTML structure
            title_elem = soup.find("span", {"itemprop": "title"})
            title = title_elem.text.strip() if title_elem else ""
            
            abstract_elem = soup.find("div", {"class": "abstract"})
            abstract = abstract_elem.text.strip() if abstract_elem else ""
            
            description_elem = soup.find("div", {"class": "description"})
            description = description_elem.text.strip() if description_elem else ""
            
            return {
                "title": title,
                "abstract": abstract,
                "description": description[:5000]  # Limit description length
            }
        except Exception as e:
            print(f"Error scraping patent info for {patent_id}: {e}")
            return {"title": "", "abstract": "", "description": ""}
        finally:
            time.sleep(self.scholar_delay)
    
    def generate_functional_descriptors(self, patent_info: Dict[str, str], compound_name: str) -> List[str]:
        """
        Generate functional descriptors using Gemini.
        
        Args:
            patent_info: Dictionary containing patent title, abstract, and description
            compound_name: Name of the compound
            
        Returns:
            List of functional descriptors
        """
        prompt = f"""
        You are a pharmaceutical expert analyzing patent information for the compound {compound_name}.
        
        Patent Title: {patent_info['title']}
        Patent Abstract: {patent_info['abstract']}
        Patent Description: {patent_info['description'][:2000]}...
        
        Based solely on the patent information above, provide 1-3 brief functional descriptors 
        (1-3 words each) for the compound {compound_name}. Focus on its therapeutic function, 
        mechanism of action, or treatment target. Be concise and specific.
        
        Format your response as a comma-separated list without explanations or additional text.
        """
        
        try:
            response = self.descriptor_model.generate_content(prompt)
            descriptors_text = response.text.strip()
            
            # Clean up response to extract just the comma-separated list
            cleaned_text = re.sub(r'^[\s\S]*?([\w\s-]+(?:,\s*[\w\s-]+)*)[\s\S]*$', r'\1', descriptors_text)
            descriptors = [d.strip() for d in cleaned_text.split(',')]
            
            # Clean up descriptors
            cleaned_descriptors = []
            for d in descriptors:
                # Remove any unwanted characters and enforce length limits
                d = re.sub(r'[^\w\s-]', '', d)
                if len(d.split()) <= 3 and d not in cleaned_descriptors:
                    cleaned_descriptors.append(d)
            
            time.sleep(self.gemini_delay)
            return cleaned_descriptors
        except Exception as e:
            print(f"Error generating descriptors for {compound_name}: {e}")
            return []
    
    def determine_functional_similarity(self, query_descriptors: List[str], 
                                       alt_descriptors: List[str],
                                       query_name: str,
                                       alt_name: str) -> Dict:
        """
        Determine functional similarity using Gemini.
        
        Args:
            query_descriptors: List of descriptors for the query compound
            alt_descriptors: List of descriptors for the alternative compound
            query_name: Name of the query compound
            alt_name: Name of the alternative compound
            
        Returns:
            Dictionary with similarity assessment
        """
        prompt = f"""
        You are analyzing the functional similarity between two pharmaceutical compounds:
        
        Query compound: {query_name}
        Functional descriptors: {', '.join(query_descriptors)}
        
        Alternative compound: {alt_name}
        Functional descriptors: {', '.join(alt_descriptors)}
        
        Based on these functional descriptors, determine whether these compounds have similar functionality.
        Consider mechanism of action, therapeutic targets, and clinical applications.
        
        Provide your assessment as a JSON with the following structure:
        {{
            "is_similar": true/false,
            "similarity_score": [0-100],
            "explanation": "Brief explanation of your reasoning",
            "shared_functions": ["list", "of", "shared", "functions"]
        }}
        
        Only provide the JSON output, nothing else.
        """
        
        try:
            response = self.similarity_model.generate_content(prompt)
            similarity_assessment = response.text.strip()
            
            # Extract the JSON part if there's any text around it
            json_match = re.search(r'{.*}', similarity_assessment, re.DOTALL)
            if json_match:
                similarity_assessment = json_match.group(0)
            
            # Convert to Python dictionary
            try:
                result = json.loads(similarity_assessment)
                time.sleep(self.gemini_delay)
                return result
            except json.JSONDecodeError:
                print(f"Error parsing JSON from similarity assessment: {similarity_assessment}")
                return {
                    "is_similar": False,
                    "similarity_score": 0,
                    "explanation": "Error processing response",
                    "shared_functions": []
                }
                
        except Exception as e:
            print(f"Error determining similarity between {query_name} and {alt_name}: {e}")
            return {
                "is_similar": False,
                "similarity_score": 0,
                "explanation": f"Error: {str(e)}",
                "shared_functions": []
            }
    
    def process_compound(self, compound_name: str) -> Dict:
        """
        Process a single compound through the entire pipeline.
        
        Args:
            compound_name: Name of the compound
            
        Returns:
            Dictionary with processing results
        """
        results = {"name": compound_name, "cids": [], "patents": [], "descriptors": []}
        
        # Step 1: Get PubChem CIDs
        cids = self.get_pubchem_cid(compound_name)
        results["cids"] = cids
        
        if not cids:
            print(f"No CIDs found for {compound_name}")
            return results
        
        # Step 2: Get patent IDs (up to 10 per CID)
        all_patent_ids = []
        for cid in cids[:3]:  # Limit to first 3 CIDs to avoid excessive API calls
            patent_ids = self.get_patent_ids(cid)
            all_patent_ids.extend(patent_ids)
        
        # Deduplicate and limit to 10 total
        unique_patent_ids = list(set(all_patent_ids))[:10]
        results["patents"] = unique_patent_ids
        
        if not unique_patent_ids:
            print(f"No patents found for {compound_name}")
            return results
        
        # Step 3: Scrape patent info and generate descriptors
        all_descriptors = []
        for patent_id in unique_patent_ids[:3]:  # Limit to first 3 patents
            patent_info = self.scrape_patent_info(patent_id)
            if any(patent_info.values()):  # If we got any useful info
                descriptors = self.generate_functional_descriptors(patent_info, compound_name)
                all_descriptors.extend(descriptors)
        
        # Deduplicate descriptors
        unique_descriptors = list(set(all_descriptors))
        results["descriptors"] = unique_descriptors
        
        return results
    
    def run_pipeline(self, sample_size: Optional[int] = None) -> Dict:
        """
        Run the full validation pipeline on all compounds in the DataFrame.
        
        Args:
            sample_size: Optional number of rows to process (for testing)
            
        Returns:
            Dictionary with validation results
        """
        # Process the dataframe
        df_to_process = self.df.head(sample_size) if sample_size else self.df
        
        # Process query compounds
        print("Processing query compounds...")
        query_results = {}
        for idx, row in tqdm(df_to_process.iterrows(), total=len(df_to_process)):
            query_name = row[self.query_col]
            if query_name not in query_results:
                query_results[query_name] = self.process_compound(query_name)
        
        self.query_results = query_results
        
        # Process alternative compounds
        print("Processing alternative compounds...")
        alt_results = {}
        for idx, row in tqdm(df_to_process.iterrows(), total=len(df_to_process)):
            for alt_col in self.alternatives_cols:
                alt_name = row[alt_col]
                if pd.notna(alt_name) and alt_name not in alt_results:
                    alt_results[alt_name] = self.process_compound(alt_name)
        
        self.alternative_results = alt_results
        
        # Determine functional similarity
        print("Determining functional similarity...")
        similarity_results = []
        for idx, row in tqdm(df_to_process.iterrows(), total=len(df_to_process)):
            query_name = row[self.query_col]
            query_descriptors = query_results.get(query_name, {}).get("descriptors", [])
            
            for alt_col in self.alternatives_cols:
                alt_name = row[alt_col]
                if pd.notna(alt_name):
                    alt_descriptors = alt_results.get(alt_name, {}).get("descriptors", [])
                    
                    if query_descriptors and alt_descriptors:
                        similarity = self.determine_functional_similarity(
                            query_descriptors, alt_descriptors, query_name, alt_name
                        )
                        
                        similarity_results.append({
                            "query": query_name,
                            "alternative": alt_name,
                            "is_similar": similarity.get("is_similar", False),
                            "similarity_score": similarity.get("similarity_score", 0),
                            "explanation": similarity.get("explanation", ""),
                            "shared_functions": similarity.get("shared_functions", [])
                        })
        
        self.similarity_results = similarity_results
        
        # Create a summary DataFrame
        summary_df = pd.DataFrame(similarity_results)
        
        # Return all results
        return {
            "query_results": query_results,
            "alternative_results": alt_results,
            "similarity_results": similarity_results,
            "summary_df": summary_df
        }
    
    def save_results(self, output_dir: str = "validation_results"):
        """Save all results to CSV files."""
        os.makedirs(output_dir, exist_ok=True)
        
        # Convert nested dictionaries to DataFrames
        query_df = pd.DataFrame([
            {"name": name, "cids": ",".join(data["cids"]), 
             "patents": ",".join(data["patents"]), 
             "descriptors": ",".join(data["descriptors"])}
            for name, data in self.query_results.items()
        ])
        
        alt_df = pd.DataFrame([
            {"name": name, "cids": ",".join(data["cids"]), 
             "patents": ",".join(data["patents"]), 
             "descriptors": ",".join(data["descriptors"])}
            for name, data in self.alternative_results.items()
        ])
        
        sim_df = pd.DataFrame(self.similarity_results)
        
        # Save to CSV
        query_df.to_csv(f"{output_dir}/query_compounds.csv", index=False)
        alt_df.to_csv(f"{output_dir}/alternative_compounds.csv", index=False)
        sim_df.to_csv(f"{output_dir}/similarity_results.csv", index=False)
        
        print(f"Results saved to {output_dir}/")

def main():
    GOOGLE_API_KEY = "AIzaSyAdfhL-mt4l_Yt2Dz5eaWRNfbZcPQxzn6Q"
    
    df = alt_df.copy()
    
    # Initialize validation framework
    framework = PatentValidationFramework(df, api_key=GOOGLE_API_KEY, query_col="drug")
    
    # Run validation pipeline (with a small sample for testing)
    results = framework.run_pipeline(sample_size=300)
    
    # Print summary
    summary_df = results["summary_df"]
    print(f"Total compounds analyzed: {len(summary_df)}")
    print(f"Functionally similar compounds: {summary_df['is_similar'].sum()}")
    print(f"Average similarity score: {summary_df['similarity_score'].mean():.2f}")
    
    # Save results
    framework.save_results()

if __name__ == "__main__":
    main()

Processing query compounds...


  0%|          | 0/300 [00:00<?, ?it/s]

Error retrieving patent IDs for CID 16129704: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/16129704/xrefs/PatentID/JSON


  0%|          | 1/300 [00:33<2:48:58, 33.91s/it]

No patents found for Bivalirudin
Error retrieving patent IDs for CID 657181: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/657181/xrefs/PatentID/JSON


  1%|          | 2/300 [01:05<2:41:56, 32.61s/it]

No patents found for Leuprolide
Error retrieving patent IDs for CID 5311128: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5311128/xrefs/PatentID/JSON


  1%|          | 3/300 [01:38<2:41:26, 32.61s/it]

No patents found for Goserelin


  1%|▏         | 4/300 [02:23<3:04:41, 37.44s/it]

Error retrieving patent IDs for CID 16051933: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/16051933/xrefs/PatentID/JSON


  3%|▎         | 8/300 [06:55<5:39:39, 69.79s/it]

Error retrieving patent IDs for CID 5284373: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5284373/xrefs/PatentID/JSON
Error retrieving patent IDs for CID 5280754: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5280754/xrefs/PatentID/JSON


  3%|▎         | 9/300 [08:19<6:01:11, 74.47s/it]

Error retrieving patent IDs for CID 16131215: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/16131215/xrefs/PatentID/JSON


  3%|▎         | 10/300 [08:51<4:56:18, 61.31s/it]

No patents found for Abarelix
Error retrieving patent IDs for CID 1051: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1051/xrefs/PatentID/JSON


  4%|▎         | 11/300 [09:23<4:11:38, 52.25s/it]

No patents found for Pyridoxal phosphate


  4%|▍         | 13/300 [10:29<3:23:45, 42.60s/it]

Error retrieving patent IDs for CID 6274: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6274/xrefs/PatentID/JSON


  5%|▍         | 14/300 [11:01<3:08:43, 39.59s/it]

No patents found for Histidine
Error retrieving patent IDs for CID 34756: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/34756/xrefs/PatentID/JSON


  5%|▌         | 16/300 [13:13<3:58:24, 50.37s/it]

Error retrieving patent IDs for CID 6140: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6140/xrefs/PatentID/JSON


  6%|▌         | 17/300 [13:45<3:31:57, 44.94s/it]

No patents found for Phenylalanine
Error retrieving patent IDs for CID 171548: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/171548/xrefs/PatentID/JSON


  6%|▌         | 18/300 [14:18<3:13:26, 41.16s/it]

No patents found for Biotin


  6%|▋         | 19/300 [14:59<3:13:03, 41.22s/it]

Error scraping patent info for US2008262029A1: 404 Client Error: Not Found for url: https://patents.google.com/patent/US2008262029A1


  7%|▋         | 21/300 [16:17<3:08:23, 40.51s/it]

Error retrieving patent IDs for CID 54670067: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/54670067/xrefs/PatentID/JSON


  7%|▋         | 22/300 [16:49<2:55:37, 37.90s/it]

No patents found for Ascorbic acid
Error retrieving patent IDs for CID 1103: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1103/xrefs/PatentID/JSON


  8%|▊         | 23/300 [17:21<2:47:00, 36.18s/it]

No patents found for Spermine
Error retrieving patent IDs for CID 5960: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5960/xrefs/PatentID/JSON


  8%|▊         | 24/300 [17:52<2:40:18, 34.85s/it]

No patents found for Aspartic acid


  9%|▊         | 26/300 [19:11<2:49:10, 37.04s/it]

Error retrieving patent IDs for CID 6083: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6083/xrefs/PatentID/JSON


  9%|▉         | 27/300 [19:43<2:42:06, 35.63s/it]

No patents found for Adenosine phosphate
Error retrieving patent IDs for CID 5280934: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5280934/xrefs/PatentID/JSON


  9%|▉         | 28/300 [20:16<2:37:56, 34.84s/it]

No patents found for alpha-Linolenic acid
Error retrieving patent IDs for CID 5951: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5951/xrefs/PatentID/JSON


 10%|▉         | 29/300 [20:49<2:33:58, 34.09s/it]

No patents found for Serine
Error retrieving patent IDs for CID 6137: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6137/xrefs/PatentID/JSON
Error retrieving patent IDs for CID 876: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/876/xrefs/PatentID/JSON


 10%|█         | 30/300 [22:24<3:56:09, 52.48s/it]

Error retrieving patent IDs for CID 6057: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6057/xrefs/PatentID/JSON


 10%|█         | 31/300 [22:56<3:27:43, 46.33s/it]

No patents found for Tyrosine
Error retrieving patent IDs for CID 5280453: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5280453/xrefs/PatentID/JSON


 11%|█         | 32/300 [23:30<3:09:40, 42.47s/it]

No patents found for Calcitriol
Error retrieving patent IDs for CID 5281243: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5281243/xrefs/PatentID/JSON


 11%|█         | 33/300 [24:01<2:54:28, 39.21s/it]

No patents found for Lutein
Error retrieving patent IDs for CID 67678: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/67678/xrefs/PatentID/JSON
Error retrieving patent IDs for CID 595: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/595/xrefs/PatentID/JSON
Error retrieving patent IDs for CID 3036261: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/3036261/xrefs/PatentID/JSON


 11%|█▏        | 34/300 [25:35<4:05:48, 55.45s/it]

No patents found for Cystine
Error retrieving patent IDs for CID 1110: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1110/xrefs/PatentID/JSON


 12%|█▏        | 35/300 [26:07<3:33:48, 48.41s/it]

No patents found for Succinic acid
Error retrieving patent IDs for CID 493570: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/493570/xrefs/PatentID/JSON


 12%|█▏        | 36/300 [26:39<3:11:19, 43.48s/it]

No patents found for Riboflavin
Error retrieving patent IDs for CID 1738118: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1738118/xrefs/PatentID/JSON


 12%|█▏        | 37/300 [27:10<2:55:09, 39.96s/it]

No patents found for N-Acetylglucosamine
Error retrieving patent IDs for CID 33032: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/33032/xrefs/PatentID/JSON


 13%|█▎        | 38/300 [27:43<2:44:34, 37.69s/it]

No patents found for Glutamic acid
Error retrieving patent IDs for CID 124886: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/124886/xrefs/PatentID/JSON


 13%|█▎        | 39/300 [28:14<2:35:58, 35.86s/it]

No patents found for Glutathione
Error retrieving CID for Phosphatidyl serine: 404 Client Error: PUGREST.NotFound for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Phosphatidyl%20serine/cids/JSON


 13%|█▎        | 40/300 [28:17<1:52:10, 25.89s/it]

No CIDs found for Phosphatidyl serine
Error retrieving patent IDs for CID 750: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/750/xrefs/PatentID/JSON


 14%|█▎        | 41/300 [28:49<2:00:00, 27.80s/it]

No patents found for Glycine


 14%|█▍        | 42/300 [29:30<2:16:59, 31.86s/it]

Error retrieving patent IDs for CID 1050: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1050/xrefs/PatentID/JSON


 14%|█▍        | 43/300 [30:02<2:16:15, 31.81s/it]

No patents found for Pyridoxal
Error retrieving patent IDs for CID 586: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/586/xrefs/PatentID/JSON


 15%|█▍        | 44/300 [30:34<2:15:51, 31.84s/it]

No patents found for Creatine
Error retrieving patent IDs for CID 6106: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6106/xrefs/PatentID/JSON


 15%|█▌        | 45/300 [31:06<2:15:32, 31.89s/it]

No patents found for Leucine
Error retrieving patent IDs for CID 6305: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6305/xrefs/PatentID/JSON


 15%|█▌        | 46/300 [31:39<2:15:58, 32.12s/it]

No patents found for Tryptophan


 16%|█▌        | 47/300 [32:26<2:34:41, 36.69s/it]

Error retrieving patent IDs for CID 1130: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1130/xrefs/PatentID/JSON


 16%|█▌        | 48/300 [32:58<2:28:40, 35.40s/it]

No patents found for Thiamine
Error retrieving patent IDs for CID 5280793: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5280793/xrefs/PatentID/JSON


 16%|█▋        | 49/300 [33:31<2:24:04, 34.44s/it]

No patents found for Ergocalciferol


 17%|█▋        | 50/300 [34:10<2:29:36, 35.91s/it]

Error retrieving patent IDs for CID 9750: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/9750/xrefs/PatentID/JSON


 17%|█▋        | 51/300 [34:42<2:23:48, 34.65s/it]

No patents found for Citrulline
Error retrieving patent IDs for CID 6288: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6288/xrefs/PatentID/JSON


 17%|█▋        | 52/300 [35:13<2:19:29, 33.75s/it]

No patents found for Threonine
Error retrieving patent IDs for CID 439153: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/439153/xrefs/PatentID/JSON


 18%|█▊        | 53/300 [35:45<2:16:17, 33.11s/it]

No patents found for NADH
Error retrieving patent IDs for CID 135398658: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/135398658/xrefs/PatentID/JSON


 18%|█▊        | 54/300 [36:17<2:14:02, 32.69s/it]

No patents found for Folic acid
Error retrieving patent IDs for CID 446284: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/446284/xrefs/PatentID/JSON


 18%|█▊        | 55/300 [36:48<2:12:13, 32.38s/it]

No patents found for Icosapent
Error retrieving patent IDs for CID 5950: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5950/xrefs/PatentID/JSON
Error retrieving patent IDs for CID 71080: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/71080/xrefs/PatentID/JSON


 19%|█▊        | 56/300 [38:15<3:17:29, 48.56s/it]

Error retrieving patent IDs for CID 6287: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6287/xrefs/PatentID/JSON


 19%|█▉        | 57/300 [38:47<2:56:42, 43.63s/it]

No patents found for Valine
Error retrieving patent IDs for CID 445354: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/445354/xrefs/PatentID/JSON


 19%|█▉        | 58/300 [39:19<2:42:25, 40.27s/it]

No patents found for Vitamin A
Error retrieving patent IDs for CID 14985: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/14985/xrefs/PatentID/JSON


 20%|█▉        | 59/300 [39:51<2:31:20, 37.68s/it]

No patents found for Vitamin E
Error retrieving patent IDs for CID 1054: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1054/xrefs/PatentID/JSON


 20%|██        | 60/300 [40:23<2:23:32, 35.89s/it]

No patents found for Pyridoxine
Error retrieving patent IDs for CID 6112: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6112/xrefs/PatentID/JSON


 20%|██        | 61/300 [40:54<2:18:01, 34.65s/it]

No patents found for Lipoic acid
Error retrieving patent IDs for CID 6306: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6306/xrefs/PatentID/JSON


 21%|██        | 62/300 [41:26<2:13:53, 33.75s/it]

No patents found for Isoleucine
Error retrieving patent IDs for CID 134601: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/134601/xrefs/PatentID/JSON


 21%|██        | 63/300 [41:58<2:10:48, 33.12s/it]

No patents found for Aspartame
Error retrieving patent IDs for CID 5280795: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5280795/xrefs/PatentID/JSON


 21%|██▏       | 64/300 [42:39<2:20:02, 35.60s/it]

Error retrieving patent IDs for CID 4055: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/4055/xrefs/PatentID/JSON


 22%|██▏       | 65/300 [43:12<2:16:17, 34.80s/it]

No patents found for Menadione
Error retrieving patent IDs for CID 5957: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5957/xrefs/PatentID/JSON


 22%|██▏       | 66/300 [43:44<2:12:19, 33.93s/it]

No patents found for ATP
Error retrieving patent IDs for CID 145742: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/145742/xrefs/PatentID/JSON


 22%|██▏       | 67/300 [44:15<2:09:01, 33.22s/it]

No patents found for Proline


 23%|██▎       | 68/300 [45:00<2:21:38, 36.63s/it]

Error retrieving patent IDs for CID 6267: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6267/xrefs/PatentID/JSON


 23%|██▎       | 69/300 [45:32<2:15:58, 35.32s/it]

No patents found for Asparagine
Error retrieving patent IDs for CID 54687: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/54687/xrefs/PatentID/JSON


 23%|██▎       | 70/300 [46:04<2:11:34, 34.32s/it]

No patents found for Pravastatin


 24%|██▎       | 71/300 [47:23<3:02:00, 47.69s/it]

Error retrieving patent IDs for CID 60846: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/60846/xrefs/PatentID/JSON


 24%|██▍       | 72/300 [47:56<2:44:14, 43.22s/it]

No patents found for Valsartan
Error retrieving patent IDs for CID 5362129: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5362129/xrefs/PatentID/JSON


 24%|██▍       | 73/300 [48:29<2:31:28, 40.04s/it]

No patents found for Ramipril
Error retrieving patent IDs for CID 71398: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/71398/xrefs/PatentID/JSON


 25%|██▍       | 74/300 [49:00<2:21:10, 37.48s/it]

No patents found for Masoprocol
Error retrieving patent IDs for CID 82153: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/82153/xrefs/PatentID/JSON


 25%|██▌       | 75/300 [49:32<2:14:22, 35.84s/it]

No patents found for Flunisolide
Error retrieving patent IDs for CID 2284: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/2284/xrefs/PatentID/JSON


 25%|██▌       | 76/300 [50:05<2:10:05, 34.85s/it]

No patents found for Baclofen
Error retrieving patent IDs for CID 3007: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/3007/xrefs/PatentID/JSON


 26%|██▌       | 77/300 [50:36<2:05:51, 33.86s/it]

No patents found for Amphetamine


 26%|██▌       | 78/300 [51:03<1:57:53, 31.86s/it]

Error retrieving patent IDs for CID 89594: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/89594/xrefs/PatentID/JSON


 26%|██▋       | 79/300 [51:35<1:57:18, 31.85s/it]

No patents found for Nicotine


 27%|██▋       | 80/300 [52:12<2:02:06, 33.30s/it]

Error retrieving patent IDs for CID 3958: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/3958/xrefs/PatentID/JSON


 27%|██▋       | 81/300 [52:44<1:59:55, 32.85s/it]

No patents found for Lorazepam


 28%|██▊       | 84/300 [54:37<2:06:30, 35.14s/it]

Error retrieving patent IDs for CID 34359: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/34359/xrefs/PatentID/JSON


 28%|██▊       | 85/300 [55:43<2:39:48, 44.60s/it]

Error retrieving patent IDs for CID 4771: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/4771/xrefs/PatentID/JSON


 29%|██▊       | 86/300 [56:16<2:25:48, 40.88s/it]

No patents found for Phentermine


 29%|██▉       | 87/300 [56:42<2:09:33, 36.50s/it]

Error retrieving patent IDs for CID 33741: 504 Server Error: PUGREST.Timeout for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/33741/xrefs/PatentID/JSON


 29%|██▉       | 88/300 [57:14<2:04:39, 35.28s/it]

No patents found for Tramadol


 29%|██▉       | 88/300 [9:04:40<21:52:10, 371.37s/it]


KeyboardInterrupt: 