In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# Search

> Functionalities to search, and retrieve data from pubmed

In [None]:
#| default_exp search

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from Bio import Entrez
import os
from datetime import datetime, timedelta, date
from fastcore.all import *
from typing import Union, Optional, Any
from pydantic import BaseModel, ValidationError, validator, root_validator

In [None]:
#| export
from pubmed_lib.data import *
from pubmed_lib.result import *
from pubmed_lib.parser import *

In [None]:
#| hide
from dotenv import load_dotenv, find_dotenv

In [None]:
#| hide
load_dotenv(find_dotenv())
email = os.environ.get('EMAIL')
api_key = os.environ.get('API_KEY')

In [None]:
#| exports

class Search(BaseModel):
    """
    Search class to warp the search and results
    """
    search_tag:str = None #Tag to specifiy the search, can be any from pubmed, Defaul: Title/Abstract
    retmax:int = 200 #Maximum number of results to be retrieved
    retmode:str ='xml' #Format of the returned data, options are xml, 
    sort:str='relevance' #Way to sort the results
    mindate:Optional[int] = None #Initial data to be search from, year
    maxdate:Optional[int] = None #Final data to be search from, year
    idlist:Optional[List[int]]
    email:str = None
    api_key:str = None
    
    @root_validator()
    def validate_email(cls,values:dict )->dict:
        email = get_from_dict_or_env(
            values, "email", "EMAIL"
        )
        values["email"] = email
        
        api_key = get_from_dict_or_env(values, 'api_key', 'API_KEY')
        values['api_key'] = api_key
        return values
        
    @validator('search_tag', always=True)
    def validate_search_tag(cls, v):
        if not v:
            v = 'Title/Abstract'
        if v not in SEARCH_TAGS.keys():
            raise ValueError(f'Search tag need to be some of {SEARCH_TAGS.keys()}')
        return SEARCH_TAGS[v]
    
     

In [None]:
#| exports

@patch
def search(
    self:Search,
    query: str, #Query to be search in pubmed
):
    """
    It receive a query to be searched in pubmed and return the handler of the search
    """
    Entrez.email = self.email
    Entrez.api_key = self.api_key
    query = query+self.search_tag
    handle = Entrez.esearch(db='pubmed',
                    sort=self.sort,
                    retmax=self.retmax,
                    retmode=self.retmode,
                    term=query,
                    mindate = self.mindate,
                    maxdate =self. maxdate)
    results = Entrez.read(handle)
    return results['IdList']

In [None]:
search = Search(retmax=4)

In [None]:
idlist = search.search('divisome')

In [None]:
idlist

['33536648', '32424210', '33220539', '27387519']

In [None]:
#| export
@patch
def fetch_details(
    self:Search,
    idlist:List[int], #list of pubmedid to be retreived
    ):
    """
    It receive a list of pubmedIds from a search, and retrieve all the details of those publications
    """
    ids = ','.join(idlist)
    handle = Entrez.efetch(db='pubmed',
                           retmode=self.retmode,
                           id=ids)
    results = Entrez.read(handle)
    return results['PubmedArticle']

In [None]:
#| exports
@patch
def results(
    self:Search,
    query:str, #Term to be queried in pubmed
)->list:
    """
    Method that do the search and retrieve a generator with all the infomration of the articles"""
    results = []
    id_list = self.search(query)
    articles = self.fetch_details(id_list)
    for article in articles:
        article_dict = parse_paperinfo(article)
        results.append( Result.parse_obj(article_dict))
    return results


In [None]:
articles = search.results('bifunctional degraders')

In [None]:
articles[0]

Result(pubmed='34917243', pmc='PMC8667060', doi='10.1021/acsmedchemlett.1c00580', pii=None, abstract='', autorlist=[Autor(Fname='Ram W', Lname='Sabnis', name='Ram W Sabnis', initials='Rw', emails='', affiliations='Smith, Gambrell & Russell Llp, 1230 Peachtree Street Ne, Suite 3100, Atlanta, Georgia 30309, United States.', identifier='0000-0001-7289-0581')], title='BRD9 Bifunctional Degraders for Treating Cancer.', journal='ACS medicinal chemistry letters', published=datetime.date(2021, 11, 6), mayorKeys=[], mayorMesh=[], minorMesh=[])

In [None]:
articles[1]

Result(pubmed='34544752', pmc='PMC8831444', doi='10.1158/2159-8290.CD-20-1726', pii='2159-8290.CD-20-1726', abstract='Cyclin-dependent kinases 4 and 6 (CDK4/6) represent a major therapeutic vulnerability for breast cancer. The kinases are clinically targeted via ATP competitive inhibitors (CDK4/6i); however, drug resistance commonly emerges over time. To understand CDK4/6i resistance, we surveyed over 1,300 breast cancers and identified several genetic alterations (e.g., <i>FAT1</i>, <i>PTEN</i>, or <i>ARID1A</i> loss) converging on upregulation of CDK6. Mechanistically, we demonstrate CDK6 causes resistance by inducing and binding CDK inhibitor INK4 proteins (e.g., p18<sup>INK4C</sup>). <i>In vitro</i> binding and kinase assays together with physical modeling reveal that the p18<sup>INK4C</sup>-cyclin D-CDK6 complex occludes CDK4/6i binding while only weakly suppressing ATP binding. Suppression of INK4 expression or its binding to CDK6 restores CDK4/6i sensitivity. To overcome this co

In [None]:
docs = [
f"Published: {result.published}\nTitle: {result.title}\n"
f"Abstract: {result.abstract}"
for result in articles
]

In [None]:
[print(doc) for doc in docs]

Published: 2021-11-06
Title: BRD9 Bifunctional Degraders for Treating Cancer.
Summary: 
Published: 2021-09-20
Title: INK4 Tumor Suppressor Proteins Mediate Resistance to CDK4/6 Kinase Inhibitors.
Summary: Cyclin-dependent kinases 4 and 6 (CDK4/6) represent a major therapeutic vulnerability for breast cancer. The kinases are clinically targeted via ATP competitive inhibitors (CDK4/6i); however, drug resistance commonly emerges over time. To understand CDK4/6i resistance, we surveyed over 1,300 breast cancers and identified several genetic alterations (e.g., <i>FAT1</i>, <i>PTEN</i>, or <i>ARID1A</i> loss) converging on upregulation of CDK6. Mechanistically, we demonstrate CDK6 causes resistance by inducing and binding CDK inhibitor INK4 proteins (e.g., p18<sup>INK4C</sup>). <i>In vitro</i> binding and kinase assays together with physical modeling reveal that the p18<sup>INK4C</sup>-cyclin D-CDK6 complex occludes CDK4/6i binding while only weakly suppressing ATP binding. Suppression of I

[None, None, None, None]

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()