In [4]:
from Bio.Blast import NCBIXML 
from Bio.Blast import NCBIWWW
import requests, sys, json
import re
from Bio import SeqIO
from Bio import Entrez
from Bio import Medline
import pandas as pd
import numpy as np
from tqdm import tqdm
from Bio import SearchIO
from Bio.SwissProt import KeyWList
from Bio import SwissProt
from Bio import ExPASy
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
from Bio import AlignIO
from Bio.Align.Applications import ClustalwCommandline
from Bio.Align import AlignInfo
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio import Phylo

# Literature Analysis

## Automated searches with Biopython

Two possible strategies for searching for articles.
The first allows searching globally, the second is more targeted and involves a pre-selection of the articles.

In [2]:
#First Strategie

database = "PubMed"#For literature will be PubMed
word = "DDX18"
res= "5" #I choose a limit of thirty
email= "guilherme.lobo@ua.pt"

Entrez.email= email
handle=Entrez.esearch(db = database, term=word, retmax= res)
record=Entrez.read(handle)
handle.close()
idlist= record['IdList']

handle = Entrez.efetch(db=database, id=idlist, rettype="medline", retmode="text") #para este caso em especifico devemos ter como database a PubMed("Medline")
records = Medline.parse(handle)

for record in records:
    #print(record) 
    #print("PMID:",record.get("PMID","-"))
    print("title:", record.get("TI", "-"))
    #print('abstract:', record.get('AB', '-'))
    print("authors:", record.get("AU", "-"))
    print("source:", record.get("SO", "-"))
    print("")

title: Detection of Selection Signatures in Anqing Six-End-White Pigs Based on Resequencing Data.
authors: ['Chen Y', 'Wu X', 'Wang J', 'Hou Y', 'Liu Y', 'Wang B', 'Hu X', 'Zheng X', 'Zhang X', 'Ding Y', 'Yin Z']
source: Genes (Basel). 2022 Dec 8;13(12):2310. doi: 10.3390/genes13122310.

title: DDX18 prevents R-loop-induced DNA damage and genome instability via PARP-1.
authors: ['Lin WL', 'Chen JK', 'Wen X', 'He W', 'Zarceno GA', 'Chen Y', 'Chen S', 'Paull TT', 'Liu HW']
source: Cell Rep. 2022 Jul 19;40(3):111089. doi: 10.1016/j.celrep.2022.111089.

title: Identification of Important Modules and Hub Gene in Chronic Kidney Disease Based on WGCNA.
authors: ['Wang J', 'Yin Y', 'Lu Q', 'Zhao YR', 'Hu YJ', 'Hu YZ', 'Wang ZY']
source: J Immunol Res. 2022 May 4;2022:4615292. doi: 10.1155/2022/4615292. eCollection 2022.

title: The RNA-Binding Protein DDX18 Promotes Gastric Cancer by Affecting the Maturation of MicroRNA-21.
authors: ['Zhang Y', 'Yu F', 'Ni B', 'Li Q', 'Bae SW', 'Choi JH', 'Yan

In [3]:
#Same strategie but for type II diabetes

database = "PubMed"
word = "Type 2 Diabetes"
res= "5" 
email= "guilherme.lobo@ua.pt"

Entrez.email= email
handle=Entrez.esearch(db = database, term=word, retmax= res)
record=Entrez.read(handle)
handle.close()
idlist= record['IdList']

handle = Entrez.efetch(db=database, id=idlist, rettype="medline", retmode="text")
records = Medline.parse(handle)

for record in records:
    print("title:", record.get("TI", "-"))
    print("authors:", record.get("AU", "-"))
    print("source:", record.get("SO", "-"))
    print("")

title: Association of metformin exposure with low risks of frailty and adverse outcomes in patients with diabetes.
authors: ['Liu P', 'Pan Y', 'Song Y', 'Zhou Y', 'Zhang W', 'Li X', 'Li J', 'Li Y', 'Ma L']
source: Eur J Med Res. 2023 Feb 3;28(1):65. doi: 10.1186/s40001-023-01017-6.

title: Dietary habits after a physical activity mHealth intervention: a randomized controlled trial.
authors: ['Sjoblom L', 'Bonn SE', 'Alexandrou C', 'Dahlgren A', 'Eke H', 'Trolle Lagerros Y']
source: BMC Nutr. 2023 Feb 2;9(1):23. doi: 10.1186/s40795-023-00682-4.

title: Association of triglycerides to high-density lipoprotein cholesterol ratio to identify future prediabetes and type 2 diabetes mellitus: over one-decade follow-up in the Iranian population.
authors: ['Tohidi M', 'Asgari S', 'Chary A', 'Safiee S', 'Azizi F', 'Hadaegh F']
source: Diabetol Metab Syndr. 2023 Feb 2;15(1):13. doi: 10.1186/s13098-023-00988-0.

title: Association of BMAL1 clock gene polymorphisms with fasting glucose in children.


In [3]:
#Second Strategie for myarticles_DDX18; articles used in literature analysis
alldata=[]
with open("Article_DDX18.txt",encoding="utf-8") as f: #For this examples I chosse articles from PubMed about Diabetes type II
    pmids= Medline.parse(f)
    for pmid in tqdm(pmids):
        try:
            pid=pmid["PMID"]
        except:
            pid="-"
        try:
            Title=pmid["TI"]
        except:
            Title="-"
        #try:        
            #Abstract=pmid["AB"]
        #except:
            #Abstract="-"
        #try:
            #Authors=pmid["AU"]
        #except:
            #Authors="-"
        #try:
            #Source=pmid["SO"]
        #except:
            #Source="-"
        
        dic={"PMID":pid,
             "Title":Title,
             #"Abstract":Abstract,
             #"Authors":Authors,
             #"Source":Source
            }
        alldata.append(dic)

df = pd.DataFrame.from_records(alldata,index=['1', '2',"3","4","5"])
print(df)
#writer = pd.ExcelWriter('articles_ddx18.xlsx')
#df.to_Exel(writer)
#writer.save()
#df=pd.DataFrame(alldata)
#df.to_csv("articles_ddx18.csv", index=False)

5it [00:00, 2717.93it/s]

       PMID                                              Title
1  33489896  The RNA-Binding Protein DDX18 Promotes Gastric...
2  18351129                  Cellular studies of MrDb (DDX18).
3  34603195  Non-Coding RNA as Biomarkers for Type 2 Diabet...
4  33479058  The First Genome-Wide Association Study for Ty...
5  30297969  Fine-mapping type 2 diabetes loci to single-va...



