In [31]:
"""
This is a modified version of code from: 
    1. https://lawlesst.github.io/notebook/sparql-dataframe.html
    2. https://github.com/SuLab/sparql_to_pandas/blob/master/SPARQL_pandas.ipynb
"""

import pandas as pd
import json
from SPARQLWrapper import SPARQLWrapper, JSON

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

wds = "https://query.wikidata.org/sparql"
rq = """
SELECT ?disease ?diseaseLabel ?en_article 
WHERE {
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
  ?disease wdt:P31 wd:Q12136.
  
      OPTIONAL {
      ?en_article schema:about ?disease .
      ?en_article schema:inLanguage "en" .
      ?en_article schema:isPartOf <https://en.wikipedia.org/> .
    }
}
#order by desc(?disease)
"""

df = get_sparql_dataframe(wds, rq)

##This is a modified version of code from: https://stackoverflow.com/a/48481247/2339926
def make_clickable_wkt_items(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">{}</a>'.format(val, val[31:])
def make_clickable_en_articles(val):
    # target _blank to open new window
    if not val: 
        return None
    else:
        return '<a target="_blank" href="{}">{}</a>'.format(val, val[30:])

df.style.format({'disease': make_clickable_wkt_items,'en_article': make_clickable_en_articles})
#df.style.format()

Unnamed: 0,disease,diseaseLabel,en_article
0,Q168403,myopia,Near-sightedness
1,Q170082,psychosis,Psychosis
2,Q72000,Lemierre's syndrome,Lemierre%27s_syndrome
3,Q73518,marantic endocarditis,Nonbacterial_thrombotic_endocarditis
4,Q73828,oromandibular dystonia,Oromandibular_dystonia
5,Q76973,yaws,Yaws
6,Q79793,measles,Measles
7,Q170102,scrapie,Scrapie
8,Q170990,arthritis,Arthritis
9,Q79928,acne,Acne


In [2]:
#code from: https://www.kaggle.com/rtatman/download-a-csv-file-from-a-kernel
# import the modules we'll need
from IPython.display import HTML
import pandas as pd
import numpy as np
import base64

# function that takes in a dataframe and creates a text link to  
# download it (will only work for files < 2MB or so)
def create_download_link(df, title = "<b>Download CSV file<b>", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

# create a link to download the dataframe
create_download_link(df)

In [6]:
#This is a modified version of code from: https://gist.github.com/bonzanini/5a4c39e4c02502a8451d
#https://gist.github.com/bonzanini/5a4c39e4c02502a8451d

# you need to install Biopython:
# pip install biopython

# Full discussion:
# https://marcobonzanini.wordpress.com/2015/01/12/searching-pubmed-with-python/

from Bio import Entrez


def search(query):
    Entrez.email = 'arash.joorabchi@ul.ie'
    handle = Entrez.esearch(db='pubmed', 
                            sort='relevance', 
                            retmax='200',
                            retmode='xml', 
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results


def pubmed_search(disease):
    results = search("("+disease+"""[title/abstract] NOT "withdrawn"[Title]) AND "The Cochrane database of systematic reviews"[Journal]
    """) 
    #pprint.pprint (results)
    return results['IdList']

    
pubmed_search("tetanus")

['30091147', '29851031', '29342498', '27394698', '27378094', '26621223', '26144877', '26086647', '25803792', '25408540', '24226506', '23728678', '23728640', '22513932', '21735423', '21069697', '19588375', '18425960', '16235306', '16034918', '14974046', '12519624', '11406003']

In [4]:
import wikipedia

from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import re


def searh_wp_refs_4title(pubmedArticleTitle,WPpageTitle):  
    #display (wikipedia.WikipediaPage(WPpageTitle).html())
    WPpageHTML= wikipedia.WikipediaPage(WPpageTitle).html()
    ratio = fuzz.ratio(WPpageHTML, pubmedArticleTitle)
    print ("fuzz.ratio:",ratio)

    WPpageHTML=re.sub(r'\W+', '', WPpageHTML.lower())
    pubmedArticleTitle=re.sub(r'\W+', '', pubmedArticleTitle.lower())
    print(pubmedArticleTitle)
    if pubmedArticleTitle in WPpageHTML:
        return True
    else:
        return False
    
#print (searh_wp_refs_4title("interventions to slow progression of myopia in children...","Near-sightedness"))

def searh_wp_refs_4PMID(PMID,en_article_HTML):
    ''' looksup a PMIDs in a WikiPedia page '''  

    #print('searching for PMID: <b>'+PMID+ '</b>')
    if PMID in en_article_HTML:
        #print ('found')
        return True
    else:
        #print ('not found')
        return False
    

en_article_HTML= wikipedia.WikipediaPage('Near-sightedness').html()
print (searh_wp_refs_4PMID("22161388",en_article_HTML))



True


In [39]:
import wikipedia
from tqdm import tnrange, tqdm_notebook
from urllib.parse import unquote
from IPython.display import clear_output

import ipywidgets as widgets
from ipywidgets import HBox, VBox



df['Cochrane reviews']=""
i=0
for index, row in tqdm_notebook(df.iterrows(),desc='processing diseases',total=df['en_article'].count(), unit="disease"):
    if row['en_article'] is not None:
        print()
        print('------------------------------------------------------------')
        print(i,index,row['diseaseLabel'],row['disease'],row['en_article'])

        en_article_Title = row['en_article'][30:]
        en_article_Title= unquote(unquote(en_article_Title))
        #print(en_article_Title)
        
        en_article_HTML= wikipedia.WikipediaPage(en_article_Title).html()

        id_list= pubmed_search(row['diseaseLabel'])
        matches=0
        if  id_list:
            papers = fetch_details(id_list)
            
            #print(json.dumps(papers, indent=5))
 
            row['Cochrane reviews']="""
            <div align="left" style="margin:10px;">
            <ol start="1" style="margin-left:30px">"""
            
            for j, paper in enumerate(papers['PubmedArticle']):
            
                cited=searh_wp_refs_4PMID(paper['MedlineCitation']['PMID'],en_article_HTML)
                color="red"
                confirmation=" 🗷"
                if cited:
                    matches +=1
                    color="green"
                    confirmation=" 🗹"
                
                row['Cochrane reviews']+='<li style="padding:5px;color:'+color+';">'\
                +paper['MedlineCitation']['Article']['ArticleTitle']\
                +' <a target="_blank" href="https://www.ncbi.nlm.nih.gov/pubmed/'\
                +paper['MedlineCitation']['PMID']+'">PMID: '+paper['MedlineCitation']['PMID']+'</a>'\
                +confirmation+"</li>"
                #print("%d) %s" % (j+1, paper['MedlineCitation']['Article']['ArticleTitle']),paper['MedlineCitation']['PMID'],cited)

                
            row['Cochrane reviews']+="</ol></div>"
                
        else:
            print ("No Cochrane reviews found")
            row['Cochrane reviews']= "No Cochrane reviews found"
            
        print('{} of {} Cochrane reviews found (via PubMed) are cited in the Wikipedia article: {}'.format(matches, len(papers['PubmedArticle']), en_article_Title))
        i += 1


        
       

HBox(children=(IntProgress(value=0, description='processing diseases', max=4377, style=ProgressStyle(descripti…


------------------------------------------------------------
0 0 myopia http://www.wikidata.org/entity/Q168403 https://en.wikipedia.org/wiki/Near-sightedness

------------------------------------------------------------
1 1 psychosis http://www.wikidata.org/entity/Q170082 https://en.wikipedia.org/wiki/Psychosis

------------------------------------------------------------
2 2 Lemierre's syndrome http://www.wikidata.org/entity/Q72000 https://en.wikipedia.org/wiki/Lemierre%27s_syndrome
No Cochrane reviews found

------------------------------------------------------------
3 3 marantic endocarditis http://www.wikidata.org/entity/Q73518 https://en.wikipedia.org/wiki/Nonbacterial_thrombotic_endocarditis
No Cochrane reviews found

------------------------------------------------------------
4 4 oromandibular dystonia http://www.wikidata.org/entity/Q73828 https://en.wikipedia.org/wiki/Oromandibular_dystonia
No Cochrane reviews found

----------------------------------------------------------

KeyboardInterrupt: 

In [37]:
        test= widgets.Textarea(
            value='Hello World',
            placeholder='Type something',
            description='String:',
            disabled=False
        )
        test

Textarea(value='Hello World', description='String:', placeholder='Type something')

In [30]:
df.head(30).style.format({'disease': make_clickable_wkt_items,'en_article': make_clickable_en_articles})

Unnamed: 0,disease,diseaseLabel,en_article,Cochrane reviews
0,Q168403,myopia,Near-sightedness,Anti-vascular endothelial growth factor (VEGF) drugs for treatment of retinopathy of prematurity. PMID: 29308602 🗷Laser-assisted subepithelial keratectomy (LASEK) versus laser-assisted in-situ keratomileusis (LASIK) for correcting myopia. PMID: 28197998 🗷Anti-vascular endothelial growth factor for choroidal neovascularisation in people with pathological myopia. PMID: 27977064 🗷Laser-assisted subepithelial keratectomy (LASEK) versus photorefractive keratectomy (PRK) for correction of myopia. PMID: 26899152 🗷Anti-vascular endothelial growth factor (VEGF) drugs for treatment of retinopathy of prematurity. PMID: 26932750 🗷Excimer laser refractive surgery versus phakic intraocular lenses for the correction of moderate to high myopia. PMID: 24937100 🗷Laser-assisted in-situ keratomileusis (LASIK) versus photorefractive keratectomy (PRK) for myopia. PMID: 23440799 🗷Excimer laser refractive surgery versus phakic intraocular lenses for the correction of moderate to high myopia. PMID: 22258972 🗷Interventions to slow progression of myopia in children. PMID: 22161388 🗹Acupuncture for slowing the progression of myopia in children and adolescents. PMID: 21901710 🗷Excimer laser refractive surgery versus phakic intraocular lenses for the correction of moderate to high myopia. PMID: 20464757 🗷Photorefractive keratectomy (PRK) versus laser-assisted in-situ keratomileusis (LASIK) for myopia. PMID: 16625626 🗷Laser photocoagulation for choroidal neovascularisation in pathologic myopia. PMID: 16235380 🗷
1,Q170082,psychosis,Psychosis,"De-escalation techniques for managing non-psychosis induced aggression in adults. PMID: 30019748 🗷Risperidone for psychosis-induced aggression or agitation (rapid tranquillisation). PMID: 29634083 🗷Withdrawal versus continuation of long-term antipsychotic drug use for behavioural and psychological symptoms in older people with dementia. PMID: 29605970 🗷Aripiprazole (intramuscular) for psychosis-induced aggression or agitation (rapid tranquillisation). PMID: 29308601 🗷Benzodiazepines for psychosis-induced aggression or agitation. PMID: 29219171 🗷Haloperidol for psychosis-induced aggression or agitation (rapid tranquillisation). PMID: 28758203 🗷De-escalation techniques for psychosis-induced aggression or agitation. PMID: 28368091 🗷Psychological interventions for post-traumatic stress disorder (PTSD) in people with severe mental illness. PMID: 28116752 🗷Droperidol for psychosis-induced aggression or agitation. PMID: 27976370 🗷Haloperidol plus promethazine for psychosis-induced aggression. PMID: 27885664 🗷Haloperidol for long-term aggression in psychosis. PMID: 27889922 🗷Dietary advice for people with schizophrenia. PMID: 27007216 🗷Antiglucocorticoid and related treatments for psychosis. PMID: 26725721 🗷Interventions for psychotic symptoms concomitant with epilepsy. PMID: 26690687 🗷Cognitive behavioural therapy (brief versus standard duration) for schizophrenia. PMID: 26488686 🗷Clozapine for psychotic disorders in adults with intellectual disabilities. PMID: 26397173 🗷Voxel-based morphometry for separation of schizophrenia from other types of psychosis in first episode psychosis. PMID: 26252640 🗷Pharmacological treatment for psychotic depression. PMID: 26225902 🗷Psychoeducation for siblings of people with severe mental illness. PMID: 25953641 🗷First rank symptoms for schizophrenia. PMID: 25879096 🗷Haloperidol versus first-generation antipsychotics for the treatment of schizophrenia and other psychotic disorders. PMID: 25592299 🗷Cannabis and schizophrenia. PMID: 25314586 🗷Flupenthixol versus low-potency first-generation antipsychotic drugs for schizophrenia. PMID: 25177834 🗷Fluphenazine versus low-potency first-generation antipsychotic drugs for schizophrenia. PMID: 25087165 🗷Haloperidol versus low-potency first-generation antipsychotic drugs for schizophrenia. PMID: 25007358 🗷Trifluoperazine versus low-potency first-generation antipsychotic drugs for schizophrenia. PMID: 25003310 🗷Cognitive behavioural therapy (brief versus standard duration) for schizophrenia. PMID: 24723312 🗷Pharmacological treatment for psychotic depression. PMID: 24282034 🗷Collaborative care approaches for people with severe mental illness. PMID: 24190251 🗷Atypical antipsychotics for psychosis in adolescents. PMID: 24129841 🗷Benzodiazepines for psychosis-induced aggression or agitation. PMID: 24049046 🗷Tripterygium wilfordii Hook F (a traditional Chinese medicine) for primary nephrotic syndrome. PMID: 23934958 🗷Preventive interventions for postnatal psychosis. PMID: 23740790 🗷Quetiapine versus typical antipsychotic medications for schizophrenia. PMID: 23728667 🗷Benzodiazepines for psychosis-induced aggression or agitation. PMID: 23633309 🗷Withdrawal versus continuation of chronic antipsychotic drugs for behavioural and psychological symptoms in older people with dementia. PMID: 23543555 🗷Training to recognise the early signs of recurrence in schizophrenia. PMID: 23450559 🗷Treatment of Lennox-Gastaut syndrome. PMID: 23450537 🗷Haloperidol for psychosis-induced aggression or agitation (rapid tranquillisation). PMID: 23152276 🗷Benzodiazepines for schizophrenia. PMID: 23152236 🗷Information and communication technology in patient education and support for people with schizophrenia. PMID: 23076932 🗷Paliperidone palmitate for schizophrenia. PMID: 22696377 🗷Zuclopenthixol acetate for acute schizophrenia and similar serious mental illnesses. PMID: 22513898 🗷Aripiprazole versus placebo for schizophrenia. PMID: 21833956 🗷Early intervention for psychosis. PMID: 21678345 🗹Antidepressants for agitation and psychosis in dementia. PMID: 21328305 🗷Risperidone versus other atypical antipsychotics for schizophrenia. PMID: 21249678 🗷Chlorpromazine for psychosis induced aggression or agitation. PMID: 20393959 🗷Olanzapine versus other atypical antipsychotics for schizophrenia. PMID: 20238348 🗷Quetiapine versus other atypical antipsychotics for schizophrenia. PMID: 20091600 🗷Therapeutic interventions for symptomatic treatment in Huntington's disease. PMID: 19588393 🗷Haloperidol plus promethazine for psychosis-induced aggression. PMID: 19588366 🗷Treatment of Lennox-Gastaut syndrome. PMID: 19588340 🗷Sertindole versus other atypical antipsychotics for schizophrenia. PMID: 19370652 🗷Treatment for amphetamine psychosis. PMID: 19160215 🗷Treatment for amphetamine psychosis. PMID: 18843639 🗷Interventions for psychotic symptoms concomitant with epilepsy. PMID: 18843704 🗷Aripiprazole versus typical antipsychotic drugs for schizophrenia. PMID: 18646161 🗷Cannabis and schizophrenia. PMID: 18646115 🗷Paliperidone for schizophrenia. PMID: 18425951 🗷Aripiprazole versus typicals for schizophrenia. PMID: 18254107 🗷Open general medical wards versus specialist psychiatric units for acute psychoses. PMID: 17943786 🗷Antipsychotic medication for childhood-onset schizophrenia. PMID: 17636744 🗷Early intervention for psychosis. PMID: 17054213 🗷Pharmacological treatments for psychosis-related polydipsia. PMID: 17054176 🗷Educational games for mental health professionals. PMID: 16625545 🗷Cyclophosphamide versus methylprednisolone for treating neuropsychiatric involvement in systemic lupus erythematosus. PMID: 16625558 🗷The effectiveness of atypical antipsychotics for the treatment of aggression and psychosis in Alzheimer's disease. PMID: 16437455 🗷Estrogen for schizophrenia. PMID: 16235377 🗷Benzodiazepines alone or in combination with antipsychotic drugs for acute psychosis. PMID: 16235313 🗷Sertindole for schizophrenia. PMID: 16034864 🗷Haloperidol plus promethazine for psychosis induced aggression. PMID: 15654706 🗷Droperidol for acute psychosis. PMID: 15495037 🗷Zuclopenthixol acetate for acute schizophrenia and similar serious mental illnesses. PMID: 15266432 🗷Early Intervention for psychosis. PMID: 15106257 🗷Antipsychotic drugs for non-affective psychosis during pregnancy and postpartum. PMID: 15106251 🗷Antipsychotic drug treatment for elderly people with late-onset schizophrenia. PMID: 12804499 🗷Open general medical wards versus specialist psychiatric units for acute psychoses. PMID: 12804459 🗷Depot risperidone for schizophrenia. PMID: 14584007 🗷Treatment of Lennox-Gastaut syndrome. PMID: 12917958 🗷Pharmacological treatments for psychosis-related polydipsia. PMID: 12137700 🗷Antidepressants for people with both schizophrenia and depression. PMID: 12076447 🗷Valproic acid, valproate and divalproex in the maintenance treatment of bipolar disorder. PMID: 11687047 🗷Clotiapine for acute psychotic illnesses. PMID: 11279762 🗷Treatment for amphetamine psychosis. PMID: 11687172 🗷Droperidol for acute psychosis. PMID: 11406047 🗷Cyclophosphamide versus methylprednisolone for the treatment of neuropsychiatric involvement in systemic lupus erythematosus. PMID: 10908541 🗷"
2,Q72000,Lemierre's syndrome,Lemierre%27s_syndrome,No Cochrane reviews found
3,Q73518,marantic endocarditis,Nonbacterial_thrombotic_endocarditis,No Cochrane reviews found
4,Q73828,oromandibular dystonia,Oromandibular_dystonia,No Cochrane reviews found
5,Q76973,yaws,Yaws,No Cochrane reviews found
6,Q79793,measles,Measles,"Patient reminder and recall interventions to improve immunization rates. PMID: 29342498 🗷Zinc supplementation for the treatment of measles in children. PMID: 28631310 🗹Vitamin A supplementation for preventing morbidity and mortality in children from six months to five years of age. PMID: 28282701 🗷Parents' and informal caregivers' views and experiences of communication about routine childhood vaccination: a synthesis of qualitative evidence. PMID: 28169420 🗷Routine vitamin A supplementation for the prevention of blindness due to measles infection in children. PMID: 27580345 🗷Integrated management of childhood illness (IMCI) strategy for children under five. PMID: 27378094 🗷Zinc supplementation for the treatment of measles in children. PMID: 25794053 🗷Interventions aimed at communities to inform and/or educate about early childhood vaccination. PMID: 25408540 🗷Post-exposure passive immunisation for preventing measles. PMID: 24687262 🗷Routine vitamin A supplementation for the prevention of blindness due to measles infection in children. PMID: 24436005 🗷Antibiotics for preventing complications in children with measles. PMID: 23943263 🗹Vaccines for measles, mumps and rubella in children. PMID: 22336803 🗷Chinese medicinal herbs for measles. PMID: 22071825 🗷Routine vitamin A supplementation for the prevention of blindness due to measles infection in children. PMID: 21491401 🗷Vitamin A supplementation for preventing morbidity and mortality in children from 6 months to 5 years of age. PMID: 21154399 🗷Chinese medicinal herbs for measles. PMID: 19821347 🗷Antibiotics for preventing complications in children with measles. PMID: 18646073 🗷Chinese medicinal herbs for measles. PMID: 16625642 🗷Vaccines for measles, mumps and rubella in children. PMID: 16235361 🗷Vitamin A for treating measles in children. PMID: 16235283 🗹Patient reminder and patient recall systems to improve immunization rates. PMID: 16034918 🗷Vitamin A for non-measles pneumonia in children. PMID: 16034908 🗷Vitamin A for treating measles in children. PMID: 11869601 🗷Interventions aimed at improving immunization rates. PMID: 12519624 🗷Vitamin A for treating measles in children. PMID: 11405993 🗷Antibiotics for preventing pneumonia in children with measles. PMID: 11034721 🗷Antibiotics for preventing pneumonia in children with measles. PMID: 10796647 🗷Antibiotics for preventing pneumonia in children with measles. PMID: 10908500 🗷"
7,Q370066,tolosa-hunt syndrome,Tolosa%E2%80%93Hunt_syndrome,No Cochrane reviews found
8,Q371520,polydactyly,Polydactyly,No Cochrane reviews found
9,Q372701,esophageal cancer,Esophageal_cancer,Chemoradiotherapy versus chemoradiotherapy plus surgery for esophageal cancer. PMID: 28829911 🗷Preoperative chemotherapy for resectable thoracic esophageal cancer. PMID: 25988291 🗷Omentoplasty for esophagogastrostomy after esophagectomy. PMID: 23152259 🗷Chinese herbal medicines for esophageal cancer. PMID: 19821327 🗷Medicinal herbs for esophageal cancer. PMID: 17443549 🗷Medicinal herbs for esophageal cancer. PMID: 17253513 🗷Chemotherapy for metastatic carcinoma of the esophagus and gastro-esophageal junction. PMID: 17054195 🗷Preoperative chemotherapy for resectable thoracic esophageal cancer. PMID: 16855972 🗷Combined chemotherapy and radiotherapy (without surgery) compared with radiotherapy alone in localized carcinoma of the esophagus. PMID: 16437440 🗷Preoperative radiotherapy for esophageal carcinoma. PMID: 16235286 🗷Preoperative chemotherapy for resectable thoracic esophageal cancer. PMID: 14583936 🗷Combined chemotherapy and radiotherapy (without surgery) compared with radiotherapy alone in localized carcinoma of the esophagus. PMID: 12535428 🗷Preoperative chemotherapy for resectable thoracic esophageal cancer. PMID: 11279723 🗷Combined chemotherapy and radiotherapy (without surgery) compared with radiotherapy alone in localized carcinoma of the esophagus. PMID: 11406033 🗷Preoperative radiotherapy for esophageal carcinoma. PMID: 11034728 🗷Preoperative radiotherapy for esophageal carcinoma. Oeosphageal Cancer Collaborative Group. PMID: 10796823 🗷


In [None]:
# create a link to download the dataframe
create_download_link(df)

In [None]:
from wikidata.client import Client
client = Client()  # doctest: +SKIP
entity = client.get('Q1472', load=True)
print (entity)
print (entity.description)

image_prop = client.get('P18')
image = entity[image_prop]
print (image)

print(image.image_resolution)

print(image.image_url)

