In [1]:
%endpoint  https://query.wikidata.org/sparql
%format    json

In [4]:
# Demonstrates basic retrieval of GWAS catalog data
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>

SELECT DISTINCT ?gene ?geneLabel where {
   ?gene wdt:P2293 wd:Q35869 .  # gene has genetic association to "asthma"
   ?gene wdt:P31 wd:Q7187 .     # gene is subclass of "gene"
   SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
 }

gene,geneLabel
http://www.wikidata.org/entity/Q5013317,COL22A1
http://www.wikidata.org/entity/Q14912759,SLC22A5
http://www.wikidata.org/entity/Q14914243,PSAP
http://www.wikidata.org/entity/Q14907990,SLC30A8
http://www.wikidata.org/entity/Q18025002,GAB1
http://www.wikidata.org/entity/Q18035589,C6orf10
http://www.wikidata.org/entity/Q18054256,GSDMA
http://www.wikidata.org/entity/Q18058487,C5orf56
http://www.wikidata.org/entity/Q18030785,PRKG1
http://www.wikidata.org/entity/Q18027370,IGSF3


In [5]:
# ... and whose gene product is localized to membrane[edit]
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT DISTINCT ?gene ?geneLabel where {
    ?gene wdt:P2293 wd:Q35869 .  # gene has genetic association to "asthma"
    
    ?gene wdt:P31 wd:Q7187 .     # gene is subclass of "gene"
  
    ?gene wdt:P688 ?protein .                # gene encodes a protein
    ?protein wdt:P681 ?cc .                  # protein has a cellular component
    ?cc wdt:P279*|wdt:P361* wd:Q14349455 .   # cell component is 'part of' or 'subclass of' membrane
  
   SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
 }

gene,geneLabel
http://www.wikidata.org/entity/Q14912759,SLC22A5
http://www.wikidata.org/entity/Q14914243,PSAP
http://www.wikidata.org/entity/Q14907990,SLC30A8
http://www.wikidata.org/entity/Q18035589,C6orf10
http://www.wikidata.org/entity/Q18054256,GSDMA
http://www.wikidata.org/entity/Q18030785,PRKG1
http://www.wikidata.org/entity/Q18027370,IGSF3
http://www.wikidata.org/entity/Q18033424,IL18R1
http://www.wikidata.org/entity/Q18045382,HPSE2
http://www.wikidata.org/entity/Q18027822,IL2RB


In [8]:
#... where the GO localization is based on a non-IEA evidence code
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT DISTINCT ?gene ?geneLabel where {
    ?gene wdt:P2293 wd:Q35869 .  # gene has genetic association to "asthma"
    
    ?gene wdt:P31 wd:Q7187 .     # gene is subclass of "gene"
  
    ?gene wdt:P688 ?protein .                        # gene encodes a protein
    ?protein p:P681 ?s .                             # protein's cell component statement
      ?s ps:P681 ?cp .                               # get statement value
      FILTER NOT EXISTS {?s pq:P459 wd:Q23190881 .}  # determination method is not IEA
     ?cp wdt:P279*|wdt:P361* wd:Q14349455 .         # statement value is 'part of' or 'subclass of' membrane
 
   SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
 }

gene,geneLabel
http://www.wikidata.org/entity/Q14912759,SLC22A5
http://www.wikidata.org/entity/Q14914243,PSAP
http://www.wikidata.org/entity/Q18027370,IGSF3
http://www.wikidata.org/entity/Q18033424,IL18R1
http://www.wikidata.org/entity/Q18045382,HPSE2
http://www.wikidata.org/entity/Q14907990,SLC30A8
http://www.wikidata.org/entity/Q18027822,IL2RB
http://www.wikidata.org/entity/Q14903974,SMAD3
http://www.wikidata.org/entity/Q18035037,RAD50
http://www.wikidata.org/entity/Q18036729,RAP1GAP2


In [15]:
#... with GWAS association with any respiratory disease
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>

SELECT ?diseaseGALabel (count (DISTINCT ?gene) as ?gene_counts) 
(group_concat(DISTINCT ?geneLabel; separator=", ") as ?geneList) WHERE {
  ?gene wdt:P2293 ?diseaseGA .        # gene has genetic association
  ?diseaseGA wdt:P279* wd:Q3286546 .  # to a type of respiratory system disease
  
  ?gene wdt:P31 wd:Q7187 ; wdt:P688 ?protein ;    # gene is subclass of "gene" and encodes protein
        rdfs:label ?geneLabel . 
  FILTER (lang(?geneLabel) = "en")
  ?protein p:P681 ?s .                             # protein's cell component statement
    ?s ps:P681 ?cp .                               # get statement value
    FILTER NOT EXISTS {?s pq:P459 wd:Q23190881 .}  # determination method is not IEA
    ?cp wdt:P279*|wdt:P361* wd:Q14349455 .         # statement value is 'part of' or 'subclass of' membrane

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} 
GROUP BY ?diseaseGALabel ?geneList ORDER BY DESC(?gene_counts)

diseaseGALabel,gene_counts,geneList
asthma,15,"SMAD3, RAP1GAP2, IL18R1, HPSE2, PSAP, HLA-DQA1, IGSF3, IL2RB, IL6R, NOTCH4, SLC30A8, SLC22A5, ERBB4, PDE4D, RAD50"
chronic obstructive pulmonary disease,5,"HLA-C, SFTPD, ATP2C2, ANXA5, ANXA11"
lung cancer,3,"TGM5, PHACTR2, VTI1A"
interstitial lung disease,2,"DSP, ATP11A"
nasopharynx carcinoma,2,"ITGA9, TNFRSF19"
non-small-cell lung carcinoma,2,"NALCN, DLST"
adenocarcinoma of the lung,1,BTNL2
pulmonary emphysema,1,BICD1


In [14]:
# ... show associated chemical exposures
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT DISTINCT ?diseaseGA ?diseaseGALabel ?exposure ?exposureLabel where {
  ?gene wdt:P2293 ?diseaseGA .        # gene has genetic association
  ?diseaseGA wdt:P279* wd:Q3286546 .  # to a type of respiratory system disease
  
  ?gene wdt:P31 wd:Q7187 .     # gene is subclass of "gene"

  ?gene wdt:P688 ?protein .                        # gene encodes a protein
  ?protein p:P681 ?s .                             # protein's cell component statement
    ?s ps:P681 ?cp .                               # get statement value
    FILTER NOT EXISTS {?s pq:P459 wd:Q23190881 .}  # determination method is not IEA
    ?cp wdt:P279*|wdt:P361* wd:Q14349455 .         # statement value is 'part of' or 'subclass of' membrane

  ?exposure wdt:P1542 ?diseaseGA .  # something causes disease
  ?exposure wdt:P279 wd:Q21167512 . # and that something is a chemical hazard
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}

diseaseGA,diseaseGALabel,exposure,exposureLabel
http://www.wikidata.org/entity/Q35869,asthma,http://www.wikidata.org/entity/Q21173555,Phenacyl chloride exposure
http://www.wikidata.org/entity/Q47912,lung cancer,http://www.wikidata.org/entity/Q21396183,arsenic pentoxide exposure
http://www.wikidata.org/entity/Q47912,lung cancer,http://www.wikidata.org/entity/Q21506740,HN1 exposure
http://www.wikidata.org/entity/Q47912,lung cancer,http://www.wikidata.org/entity/Q21513721,mechlorethamine exposure
http://www.wikidata.org/entity/Q47912,lung cancer,http://www.wikidata.org/entity/Q21514015,HN3 exposure
http://www.wikidata.org/entity/Q188605,pulmonary emphysema,http://www.wikidata.org/entity/Q21175051,phosgene exposure


In [17]:
# ... and show associated pathways
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>

SELECT DISTINCT ?gene ?geneLabel ?pathwayLabel where {
  ?gene wdt:P2293 ?diseaseGA .        # gene has genetic association
  ?diseaseGA wdt:P279* wd:Q3286546 .  # to a type of respiratory system disease
  
  ?gene wdt:P31 wd:Q7187 .     # gene is subclass of "gene"

  ?gene wdt:P688 ?protein .                        # gene encodes a protein
  ?protein p:P681 ?s .                             # protein's cell component statement
    ?s ps:P681 ?cp .                               # get statement value
    FILTER NOT EXISTS {?s pq:P459 wd:Q23190881 .}  # determination method is not IEA
    ?cp wdt:P279*|wdt:P361* wd:Q14349455 .         # statement value is 'part of' or 'subclass of' membrane

  ?pathway wdt:P31 wd:Q4915012 ;                   # instance of a biological pathway
           wdt:P527 ?gene .

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}

gene,geneLabel,pathwayLabel
http://www.wikidata.org/entity/Q14903974,SMAD3,Adipogenesis
http://www.wikidata.org/entity/Q14903974,SMAD3,Cell Cycle
http://www.wikidata.org/entity/Q14903974,SMAD3,DNA Damage Response (only ATM dependent)
http://www.wikidata.org/entity/Q14903974,SMAD3,Extracellular vesicle-mediated signaling in recipient cells
http://www.wikidata.org/entity/Q14903974,SMAD3,Senescence and Autophagy in Cancer
http://www.wikidata.org/entity/Q14903974,SMAD3,Endoderm Differentiation
http://www.wikidata.org/entity/Q14903974,SMAD3,Hypothesized Pathways in Pathogenesis of Cardiovascular Disease
http://www.wikidata.org/entity/Q14903974,SMAD3,TGF-B Signaling in Thyroid Cells for Epithelial-Mesenchymal Transition
http://www.wikidata.org/entity/Q14903974,SMAD3,Androgen receptor signaling pathway
http://www.wikidata.org/entity/Q14903974,SMAD3,Hepatitis C and Hepatocellular Carcinoma
