In [3]:
!pip install SPARQLWrapper 

Collecting SPARQLWrapper
  Downloading https://files.pythonhosted.org/packages/00/9b/443fbe06996c080ee9c1f01b04e2f683b2b07e149905f33a2397ee3b80a2/SPARQLWrapper-1.8.5-py3-none-any.whl
Collecting rdflib>=4.0
[?25l  Downloading https://files.pythonhosted.org/packages/d0/6b/6454aa1db753c0f8bc265a5bd5c10b5721a4bb24160fb4faf758cf6be8a1/rdflib-5.0.0-py3-none-any.whl (231kB)
[K     |████████████████████████████████| 235kB 6.4MB/s 
Collecting isodate
[?25l  Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)
[K     |████████████████████████████████| 51kB 4.1MB/s 
[?25hInstalling collected packages: isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-1.8.5 isodate-0.6.0 rdflib-5.0.0


In [4]:
import sys
import os
from SPARQLWrapper import SPARQLWrapper
import rdflib
import json
import csv
import re

In [5]:
# SPARQL prefixes for Wikidata
prefix_list="""
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
 PREFIX dct: <http://purl.org/dc/terms/>
 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 PREFIX owl: <http://www.w3.org/2002/07/owl#>
 PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
 PREFIX schema: <http://schema.org/>
 PREFIX cc: <http://creativecommons.org/ns#>
 PREFIX geo: <http://www.opengis.net/ont/geosparql#>
 PREFIX prov: <http://www.w3.org/ns/prov#>
 PREFIX wikibase: <http://wikiba.se/ontology#>
 PREFIX wdata: <http://www.wikidata.org/wiki/Special:EntityData/>
 PREFIX bd: <http://www.bigdata.com/rdf#>
 
 PREFIX wd: <http://www.wikidata.org/entity/>
 PREFIX wdt: <http://www.wikidata.org/prop/direct/>
 PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
 
 PREFIX wds: <http://www.wikidata.org/entity/statement/>
 PREFIX p: <http://www.wikidata.org/prop/>
 PREFIX wdref: <http://www.wikidata.org/reference/>
 PREFIX wdv: <http://www.wikidata.org/value/>
 PREFIX ps: <http://www.wikidata.org/prop/statement/>
 PREFIX psv: <http://www.wikidata.org/prop/statement/value/>
 PREFIX psn: <http://www.wikidata.org/prop/statement/value-normalized/>
 PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
 PREFIX pqv: <http://www.wikidata.org/prop/qualifier/value/>
 PREFIX pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/>
 PREFIX pr: <http://www.wikidata.org/prop/reference/>
 PREFIX prv: <http://www.wikidata.org/prop/reference/value/>
 PREFIX prn: <http://www.wikidata.org/prop/reference/value-normalized/>
 PREFIX wdno: <http://www.wikidata.org/prop/novalue/>

 PREFIX hint: <http://www.bigdata.com/queryHints#>
 """

# Extract Wikidata ID-MeSHID-English-Japanese

In [6]:
query_mesh_j_e="""
select distinct ?wikidata_id ?mesh_id ?english  ?japanese
where {
  ?wikidata_id rdfs:label ?english ;
     rdfs:label ?japanese ;
     wdt:P486 ?mesh_id.
   Filter(lang(?english)="en")
   Filter(lang(?japanese)="ja")
   
 
} 
"""

In [7]:
sparql_dbpedia = SPARQLWrapper(endpoint='https://query.wikidata.org/bigdata/namespace/wdq/sparql', returnFormat='json')

In [8]:
sparql_dbpedia.setQuery(prefix_list+query_mesh_j_e)
results1 = sparql_dbpedia.query().convert()

In [9]:
results1['results']['bindings'][:10]

[{'english': {'type': 'literal', 'value': 'Belgium', 'xml:lang': 'en'},
  'japanese': {'type': 'literal', 'value': 'ベルギー', 'xml:lang': 'ja'},
  'mesh_id': {'type': 'literal', 'value': 'D001530'},
  'wikidata_id': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q31'}},
 {'english': {'type': 'literal', 'value': 'happiness', 'xml:lang': 'en'},
  'japanese': {'type': 'literal', 'value': '幸福', 'xml:lang': 'ja'},
  'mesh_id': {'type': 'literal', 'value': 'D006240'},
  'wikidata_id': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q8'}},
 {'english': {'type': 'literal', 'value': 'broad bean', 'xml:lang': 'en'},
  'japanese': {'type': 'literal', 'value': 'ソラマメ', 'xml:lang': 'ja'},
  'mesh_id': {'type': 'literal', 'value': 'D031307'},
  'wikidata_id': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q131342'}},
 {'english': {'type': 'literal', 'value': 'Ulmaceae', 'xml:lang': 'en'},
  'japanese': {'type': 'literal', 'value': 'ニレ科', 'xml:lang': 'ja'},
  'mesh_i

In [None]:
len(results1['results']['bindings'])

15230

# Todo:
* add aliases (skos:altLabel)
* extract COVID-related entries

In [10]:
# Aliases (Japanese)
query_mesh_j_alias="""
select distinct ?wikidata_id ?mesh_id ?japanese 
where {

  {?wikidata_id rdfs:label ?japanese} union {?wikidata_id skos:altLabel ?japanese}
  ?wikidata_id   wdt:P486 ?mesh_id.

   Filter(lang(?japanese)="ja")
   
 
} 
"""

In [11]:
sparql_dbpedia.setQuery(prefix_list+query_mesh_j_alias)
results2 = sparql_dbpedia.query().convert()
len(results2['results']['bindings'])

28667

In [12]:
# Aliases (English)
query_mesh_e_alias="""
select distinct ?wikidata_id ?mesh_id ?english
where {

  {?wikidata_id rdfs:label ?english} union {?wikidata_id skos:altLabel ?english}
  ?wikidata_id   wdt:P486 ?mesh_id.

   Filter(lang(?english)="en")
   
 
} 
"""

In [13]:
sparql_dbpedia.setQuery(prefix_list+query_mesh_e_alias)
results3 = sparql_dbpedia.query().convert()
len(results3['results']['bindings'])

146247