# Biopython store entire query to mongoDB

Storing entire query in mongoDB database.

In [92]:
from Bio import Entrez
import json
from bs4 import BeautifulSoup as bs
import lxml
import json

# Functions from API

In [93]:
def entrez_search_pubmed(query,records_per_query=10,email="XXX@YYY.com"):
    from Bio import Entrez
    Entrez.email = email
    # Search
    handle = Entrez.esearch(db="pubmed",term=query, idtype="acc")
    record = Entrez.read(handle)
    handle.close()
    return record

In [94]:
def entrez_fetch_list_summary(uid_list,email):
        from Bio import Entrez
        Entrez.email = email
        results = [ Entrez.read( Entrez.esummary(db="pubmed", id=uid) ) for uid in uid_list ]
        return [{'uid':a, 'summary':b} for a,b in zip(uid_list, results)]
        return results

In [95]:
def flatten_abstract(abstract_xml):
    abstract = ''
    for abstractText in abstract_xml.find_all('abstracttext'):
        if abstractText.get('label') != None:
            abstract = abstract + " " + abstractText.get('label') + ": "
        abstract = abstract + abstractText.text
    return abstract

In [96]:
def entrez_fetch_abstracts(uid,email):
    from Bio import Entrez
    from bs4 import BeautifulSoup as bs
    Entrez.email = email
    handle = Entrez.efetch(db="pubmed", id=uid, rettype='Medline', retmode='xml')
    result = handle.readlines()
    result = b"".join(result)
    bs_content = bs(result, "lxml")
    abstracts = bs_content.find_all('abstract')
    handle.close()
    # Abstract
    results = [ flatten_abstract(abstract) for abstract in abstracts]
    return results

In [97]:
def entrez_construct_abstract_dict(uids,email):
        results = entrez_fetch_abstracts( uids, email )
        return [{'uid':a, 'abstract':b} for a,b in zip(uids, results)]

In [98]:
def entrez_fetch_full_text_linkout(uid_list):
        import requests
        query = ",".join(uid_list)
        result = requests.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=" + query + "&cmd=prlinks&retmode=json").json()
        results_parsed = result['linksets'][0]['idurllist']
        return [{ 'uid': result['id'], 'objurls' : result['objurls']} for result in results_parsed]

## Procedure to fetch

In [99]:
query_body={ "query": "kruse eiken vestergaard", 'email' : "XXX@YYY.com" }
## Query search
results_query = entrez_search_pubmed(query = query_body['query'], email = query_body['email'] )
uids = results_query['IdList']
uids

['29569152', '28197643', '27848006', '26679436', '26659068', '26223424', '25466529']

In [100]:
## Summary
results_summary = entrez_fetch_list_summary(uids,"XXX@YYY.com")
results_summary[0]

{'uid': '29569152',
 'summary': [{'Item': [], 'Id': '29569152', 'PubDate': '2018 Jun', 'EPubDate': '2018 Mar 22', 'Source': 'Osteoporos Int', 'AuthorList': ['Kruse C', 'Goemaere S', 'De Buyser S', 'Lapauw B', 'Eiken P', 'Vestergaard P'], 'LastAuthor': 'Vestergaard P', 'Title': 'Predicting mortality and incident immobility in older Belgian men by characteristics related to sarcopenia and frailty.', 'Volume': '29', 'Issue': '6', 'Pages': '1437-1445', 'LangList': ['English'], 'NlmUniqueID': '9100105', 'ISSN': '0937-941X', 'ESSN': '1433-2965', 'PubTypeList': ['Journal Article'], 'RecordStatus': 'PubMed - indexed for MEDLINE', 'PubStatus': 'ppublish+epublish', 'ArticleIds': {'pubmed': ['29569152'], 'medline': [], 'doi': '10.1007/s00198-018-4467-z', 'pii': '10.1007/s00198-018-4467-z', 'rid': '29569152', 'eid': '29569152'}, 'DOI': '10.1007/s00198-018-4467-z', 'History': {'pubmed': ['2018/03/24 06:00'], 'medline': ['2019/10/24 06:00'], 'received': '2017/07/16 00:00', 'accepted': '2018/02/25 00

In [101]:
## Abstract
results_abstracts = entrez_construct_abstract_dict(uids,"XXX@YYY.com")
results_abstracts[0]

{'uid': '29569152',
 'abstract': 'There is an increasing awareness of sarcopenia in older people. We applied machine learning principles to predict mortality and incident immobility in older Belgian men through sarcopenia and frailty characteristics. Mortality could be predicted with good accuracy. Serum 25-hydroxyvitamin D and bone mineral density scores were the most important predictors. INTRODUCTION: Machine learning principles were used to predict 5-year mortality and 3-year incident severe immobility in a population of older men by frailty and sarcopenia characteristics. METHODS: Using prospective data from 1997 on 264 older Belgian men (n\u2009=\u2009152 predictors), 29 statistical models were developed and tuned on 75% of data points then validated on the remaining 25%. The model with the highest test area under the curve (AUC) was chosen as the best. From these, ranked predictor importance was extracted. RESULTS: Five-year mortality could be predicted with good accuracy (test 

In [102]:
## Elink
results_elink = entrez_fetch_full_text_linkout(uids)
results_elink[0]

{'uid': '29569152',
 'objurls': [{'url': {'value': 'https://doi.org/10.1007/s00198-018-4467-z'},
   'iconurl': {'lng': 'EN',
    'value': '//www.ncbi.nlm.nih.gov/corehtml/query/egifs/http:--production.springer.de-OnlineResources-Logos-springerlink.gif'},
   'subjecttypes': ['publishers/providers'],
   'categories': ['Full Text Sources'],
   'attributes': ['full-text online',
    'publisher of information in url',
    'subscription/membership/fee required'],
   'provider': {'name': 'Springer',
    'nameabbr': 'Springer',
    'id': '3055',
    'url': {'lng': 'EN', 'value': 'http://www.springeronline.com/'}}}]}

# Store to mongoDB

In [111]:
from pymongo import MongoClient
from loguru import logger
#client = MongoClient('mongodb', 27017)
client = MongoClient('127.0.0.1', 27017)
logger.info(client.list_database_names())

2021-05-12 11:43:48.071 | INFO     | __main__:<module>:5 - ['admin', 'config', 'local']


In [None]:
def entrez_mongodb_create_dictionaries(uid-):3
    from pymongo import MongoClient
    from loguru import logger
    #client = MongoClient('mongodb', 27017)
    client = MongoClient('127.0.0.1', 27017)
    logger.info(client.list_database_names())
