In [1]:
"""

This is a modified version of code from: 
1. https://lawlesst.github.io/notebook/sparql-dataframe.html
2. https://github.com/SuLab/sparql_to_pandas/blob/master/SPARQL_pandas.ipynb

Demonstrating how to get JupyterLab working with Binder: 

https://github.com/binder-examples/jupyterlab
https://github.com/binder-examples/jupyter-extension/blob/master/index.ipynb

"""
import pandas as pd
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import ipywidgets as widgets

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

wds = "https://query.wikidata.org/sparql"
rq = """
SELECT ?wikidata_disease ?wikidata_diseaseLabel ?wikipedia_en_article 
WHERE {
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
  ?wikidata_disease wdt:P31 wd:Q12136.
  
      OPTIONAL {
      ?wikipedia_en_article schema:about ?wikidata_disease .
      ?wikipedia_en_article schema:inLanguage "en" .
      ?wikipedia_en_article schema:isPartOf <https://en.wikipedia.org/> .
    }
}
#order by desc(?wikidata_disease)
"""

df = get_sparql_dataframe(wds, rq)



"""
This is a modified version of code from: 
1. https://stackoverflow.com/a/48481247/2339926
"""
def make_clickable_wkd_items(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">{}</a>'.format(val, val[31:])
def make_clickable_wikipedia_en_articles(val):
    # target _blank to open new window
    if not val: 
        return None
    else:
        return '<a target="_blank" href="{}">{}</a>'.format(val, val[30:])
    
button = widgets.Button(description="Show Results")
display(button)

def on_button_clicked(b):
    display(df.style.format({'wikidata_disease': make_clickable_wkd_items,'wikipedia_en_article': make_clickable_wikipedia_en_articles}))

button.on_click(on_button_clicked)



"""
This is a modified version of code from: 
1. https://www.kaggle.com/rtatman/download-a-csv-file-from-a-kernel
"""    
from IPython.display import HTML
import pandas as pd
import numpy as np
import base64

def create_download_link(df, title = "<b>Download Results in CSV Format<b>", filename = "data.csv"):
    """
    function that takes in a dataframe and creates a text link to  
    download it (will only work for files < 2MB or so)
    """  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

display(create_download_link(df))

Button(description='Show Results', style=ButtonStyle())

In [2]:
"""
This is a modified version of code from: 
1. https://gist.github.com/bonzanini/5a4c39e4c02502a8451d
2. https://gist.github.com/bonzanini/5a4c39e4c02502a8451d

# Full discussion:
# https://marcobonzanini.wordpress.com/2015/01/12/searching-pubmed-with-python/
"""

from Bio import Entrez

def search(query):
    Entrez.email = 'your.email@example.com'
    handle = Entrez.esearch(db='pubmed', 
                            sort='relevance', 
                            retmax='200',
                            retmode='xml', 
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

def pubmed_search(disease):
    results = search("("+disease+"""[title/abstract] NOT "withdrawn"[Title]) AND "The Cochrane database of systematic reviews"[Journal]
    """) 
    #pprint.pprint (results)
    return results['IdList']

#pubmed_search("Crohn's disease")

In [3]:
import wikipedia
# from fuzzywuzzy import fuzz
# from fuzzywuzzy import process
# import re

# def searh_wp_refs_4title(pubmedArticleTitle,WPpageTitle):
#     ''' looksup an article title in a WikiPedia page '''
#     #display (wikipedia.WikipediaPage(WPpageTitle).html())
#     WPpageHTML= wikipedia.WikipediaPage(WPpageTitle).html()
#     ratio = fuzz.ratio(WPpageHTML, pubmedArticleTitle)
#     print ("fuzz.ratio:",ratio)

#     WPpageHTML=re.sub(r'\W+', '', WPpageHTML.lower())
#     pubmedArticleTitle=re.sub(r'\W+', '', pubmedArticleTitle.lower())
#     print(pubmedArticleTitle)
#     if pubmedArticleTitle in WPpageHTML:
#         return True
#     else:
#         return False
    
#print (searh_wp_refs_4title("interventions to slow progression of myopia in children...","Near-sightedness"))

def searh_wp_refs_4PMID(PMID,en_article_HTML):
    ''' looksup a PMIDs in a WikiPedia page '''  

    #print('searching for PMID: <b>'+PMID+ '</b>')
    if PMID in en_article_HTML:
        #print ('found')
        return True
    else:
        #print ('not found')
        return False
    
#en_article_HTML= wikipedia.WikipediaPage('Near-sightedness').html()
#print (searh_wp_refs_4PMID("22161388",en_article_HTML))


In [5]:
import wikipedia
from tqdm import tnrange, tqdm_notebook
from urllib.parse import unquote
from IPython.display import clear_output

import ipywidgets as widgets
from ipywidgets import HBox, VBox

df['Cochrane reviews']=""
disease_search_log= widgets.HTML()
citation_search_log= widgets.HTML()
display(disease_search_log)
display(citation_search_log)
for index, row in tqdm_notebook(df.iterrows(),desc='Progress',total=df['wikipedia_en_article'].count(), unit="wikidata_disease"):
    if row['wikipedia_en_article'] is not None:

        #print(i,index,row['wikidata_diseaseLabel'],row['wikidata_disease'],row['wikipedia_en_article'])
        disease_search_log.value= f"<b>processing:</b> disease #{index} &emsp; {row['wikidata_diseaseLabel']} &emsp; {row['wikidata_disease']} &emsp; {row['wikipedia_en_article']}"

        wikipedia_en_article_Title = row['wikipedia_en_article'][30:]
        wikipedia_en_article_Title= unquote(unquote(wikipedia_en_article_Title))
        #print(wikipedia_en_article_Title)
        
        #will raise a DisambiguationError if the page is a disambiguation page, or a PageError if 
        #the page doesn’t exist (although by default, it tries to find the page you meant with suggest and search.)
        try:
            wikipedia_en_article_HTML= wikipedia.WikipediaPage(wikipedia_en_article_Title).html()
        except wikipedia.exceptions.DisambiguationError as e:
            print (e.options)
            #picks the first sense in the list by default
            wikipedia_en_article_HTML= wikipedia.WikipediaPage(e.options[0]).html()

        id_list= pubmed_search(row['wikidata_diseaseLabel'])
        matches=0
        if  id_list:
            papers = fetch_details(id_list)
            #print(json.dumps(papers, indent=5))
 
            row['Cochrane reviews']="""
            <div align="left" style="margin:10px;">
            <ol start="1" style="margin-left:30px">"""
            for j, paper in enumerate(papers['PubmedArticle']):
            
                cited=searh_wp_refs_4PMID(paper['MedlineCitation']['PMID'],wikipedia_en_article_HTML)
                color="red"
                confirmation=" 🗷"
                if cited:
                    matches +=1
                    color="green"
                    confirmation=" 🗹"
                
                row['Cochrane reviews']+='<li style="padding:5px;color:'+color+';">'\
                +paper['MedlineCitation']['Article']['ArticleTitle']\
                +' <a target="_blank" href="https://www.ncbi.nlm.nih.gov/pubmed/'\
                +paper['MedlineCitation']['PMID']+'">PMID: '+paper['MedlineCitation']['PMID']+'</a>'\
                +confirmation+"</li>"
                #print("%d) %s" % (j+1, paper['MedlineCitation']['Article']['ArticleTitle']),paper['MedlineCitation']['PMID'],cited)
            row['Cochrane reviews']+="</ol></div>"
        else:
            row['Cochrane reviews']= "No Cochrane reviews found"
            
        citation_search_log.value= f"""<p><b>processed</b>: disease #{index} &emsp; {row['wikidata_diseaseLabel']} &emsp; {row['wikidata_disease']} &emsp; {row['wikipedia_en_article']}</p>"""
        citation_search_log.value +=f"<p><b>{matches} of {len(id_list)}</b> Cochrane reviews found (via PubMed) are cited in the Wikipedia article: {wikipedia_en_article_Title}</p>"
       

HTML(value='')

HTML(value='')

HBox(children=(IntProgress(value=0, description='Progress', max=4376, style=ProgressStyle(description_width='i…

KeyboardInterrupt: 

In [None]:
display(create_download_link(df))
display(df.head(1570).style.format({'wikidata_disease': make_clickable_wkd_items,'wikipedia_en_article': make_clickable_wikipedia_en_articles}))

In [None]:
from wikidata.client import Client
client = Client()  # doctest: +SKIP
entity = client.get('Q1472', load=True)
print (entity)
print (entity.description)

image_prop = client.get('P18')
image = entity[image_prop]
print (image)

print(image.image_resolution)

print(image.image_url)

