In [1]:
from Bio import Entrez
import re

def search_pubmed(query, retmax=150):
    Entrez.email = 'pg54434@uminho.pt'
    try:
        handle = Entrez.esearch(db='pubmed', 
                                sort='relevance', 
                                retmax=retmax,  
                                retmode='xml', 
                                term=query)
        results = Entrez.read(handle)
        handle.close()
        return results
    except Exception as e:
        print(f"Erro ao buscar no PubMed: {e}")
        return None

def fetch_abstract(pubmed_id):
    try:
        handle = Entrez.efetch(db='pubmed', id=pubmed_id, retmode='xml')
        result = Entrez.read(handle)
        handle.close()
        return result
    except Exception as e:
        print(f"Erro ao buscar o resumo do artigo {pubmed_id}: {e}")
        return None

def extract_conditions_and_characteristics(article):
    conditions = []
    characteristics = []
    try:
        abstract_list = article['PubmedArticle'][0]['MedlineCitation']['Article']['Abstract']['AbstractText']
        abstract = ' '.join(abstract_list)
        
        condition_keywords = ["PEG", "precipitant", "buffer", "pH", "temperature", "salt", "concentration", "detergent"]
        for keyword in condition_keywords:
            matches = re.findall(rf"(\b{keyword}\b.*?)(?=\b[A-Z]|\b\d|\bPEG\b|\bpH\b|$)", abstract, re.IGNORECASE)
            for match in matches:
                if match.lower() not in [condition.lower() for condition in conditions]:
                    conditions.append(match)
        
        characteristic_keywords = [
            "monoclonal antibody", "hydrophobicity", "folding", "Fab fragment", "Fc region", "stability" , "solubility", 
            "variable region", "constant region", "heavy chain", 
            "light chain","amino acid sequence", "intermolecular forces",
            "hydrophilicity", "isoelectric point", "pI"
        ]
        
        for keyword in characteristic_keywords:
            matches = re.findall(rf"(\b{keyword}\b.*?)(?=\b[A-Z]|\b\d|$)", abstract, re.IGNORECASE)
            for match in matches:
                if match.lower() not in [characteristic.lower() for characteristic in characteristics]:
                    characteristics.append(match)
    except KeyError:
        pass

    return conditions, characteristics

def main():
    query = input("Digite as palavras-chave para buscar no PubMed: ")
    results = search_pubmed(query, retmax=150)
    
    if results:
        pubmed_ids = results['IdList']
        print(f"Encontrados {len(pubmed_ids)} artigos.")

        all_conditions = []
        all_characteristics = []

        for pubmed_id in pubmed_ids:
            article = fetch_abstract(pubmed_id)
            if article:
                try:
                    article_title = article['PubmedArticle'][0]['MedlineCitation']['Article']['ArticleTitle']
                    article_url = f"https://pubmed.ncbi.nlm.nih.gov/{pubmed_id}"
                    print("\nTítulo:", article_title)
                    print("Link:", article_url)

                    try:
                        abstract_list = article['PubmedArticle'][0]['MedlineCitation']['Article']['Abstract']['AbstractText']
                        abstract = ' '.join(abstract_list)
                        print("Resumo:", abstract)
                    except KeyError:
                        print("Resumo não disponível.")

                    conditions, characteristics = extract_conditions_and_characteristics(article)
                    if conditions:
                        all_conditions.extend(conditions)
                        print("Condições de cristalização:", ', '.join(conditions))
                    if characteristics:
                        all_characteristics.extend(characteristics)
                        print("Características dos anticorpos:", ', '.join(characteristics))
                except KeyError as e:
                    print(f"Erro ao extrair dados do artigo {pubmed_id}: {e}")
        
    else:
        print("Nenhum resultado encontrado ou ocorreu um erro na busca.")

if __name__ == "__main__":
    main()


Digite as palavras-chave para buscar no PubMed:  Antibody Crystallization


Encontrados 150 artigos.

Título: A workflow for the development of template-assisted membrane crystallization downstream processing for monoclonal antibody purification.
Link: https://pubmed.ncbi.nlm.nih.gov/37697106
Resumo: Monoclonal antibodies (mAbs) are commonly used biologic drugs for the treatment of diseases such as rheumatoid arthritis, multiple sclerosis, COVID-19 and various cancers. They are produced in Chinese hamster ovary cell lines and are purified via a number of complex and expensive chromatography-based steps, operated in batch mode, that rely heavily on protein A resin. The major drawback of conventional procedures is the high cost of the adsorption media and the extensive use of chemicals for the regeneration of the chromatographic columns, with an environmental cost. We have shown that conventional protein A chromatography can be replaced with a single crystallization step and gram-scale production can be achieved in continuous flow using the template-assisted mem