# Import packages #

In [1]:
!pip install sparqlwrapper



In [2]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

In [3]:
def select(query, service='https://query.wikidata.org/sparql'):
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query().convert()
    return pd.json_normalize(result['results']['bindings'])

# Table of contents #

>
1 Model profiles

    1.1 Clinical trials for Zika fever
    1.2 Clinical trials using COVID-19 vaccine
    1.3 Clinical trials at Vanderbilt University
    1.4 Clinical trials with Julie McElrath as principal investigator
    1.5 Clinical trials funded by Patient-Centered Outcomes Research Institute

2 Topics by count of clinical trials

    2.1 Medical conditions
    2.2 Research interventions
    2.3 Research sites
    2.4 Principal investigators
    2.5 Funders

3 Organizational affiliations

    3.1 Clinical trials with principal investigator and their affiliation
    3.2 Clinical trials where principal investigator has Vanderbilt University affiliation
    3.3 Chart of organizations by count of clinical trials
    3.4 Clinical trials where the sponsor was Pfizer

4 Researcher demographics

    4.1 Count of principal investigators by gender
    4.2 Clinical trials where the principal investigator is female
    4.3 Principal investigators by occupation

5 Scope of Wikidata's clinical trials content

    5.1 List of clinical trials
    5.2 Count of clinical trials
    5.3 Most common properties applied to clinical trials
    5.4 Count of statements in clinical trial records
    5.5 Count of trial records in Wikidata per clinical trial registry

>

# 1 Model profiles #

## 1.1 Clinical trials for Zika fever ##

In [4]:
query_string = """
SELECT ?trial ?trialLabel  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P2175 wd:Q8071861. }
  UNION
  { ?trial wdt:P2175 wd:Q27043680. }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_for_Zika_fever.csv")
result

Unnamed: 0,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q60563291,en,literal,Dermatologic Manifestations of Zika Virus
1,uri,http://www.wikidata.org/entity/Q60563296,en,literal,Understanding Excretion and Infectivity of Zik...
2,uri,http://www.wikidata.org/entity/Q60563298,en,literal,ZIKA Vaccine in Naive Subjects
3,uri,http://www.wikidata.org/entity/Q60563300,en,literal,Study in Healthy Volunteers Evaluating Safety ...
4,uri,http://www.wikidata.org/entity/Q60563302,en,literal,Sensorimotor Outcomes of Children Exposed to F...
5,uri,http://www.wikidata.org/entity/Q60563303,en,literal,"Phase I, Randomized, Double-blinded, Placebo-C..."
6,uri,http://www.wikidata.org/entity/Q60563306,en,literal,"A Phase 1, First-in-human, Double-blinded, Ran..."
7,uri,http://www.wikidata.org/entity/Q60563307,en,literal,Zika Virus Purified Inactivated Vaccine (ZPIV)...
8,uri,http://www.wikidata.org/entity/Q60563313,en,literal,"Randomized, Placebo-controlled, Observer-blind..."
9,uri,http://www.wikidata.org/entity/Q60563322,en,literal,Efficacy of Aquatic Physiotherapy in Children ...


## 1.2 Clinical trials using COVID-19 vaccine ##

In [5]:
query_string = """
SELECT ?trial ?trialLabel  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  ?trial wdt:P4844 ?intervention. 
  ?intervention wdt:P31/wdt:P279* wd:Q87719492 .

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_using_COVID-19_vaccine.csv")
result

Unnamed: 0,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q87775009,en,literal,Safety and Immunogenicity Study of 2019-nCoV V...
1,uri,http://www.wikidata.org/entity/Q87931195,en,literal,"A Single-center,Open-label，Dose-escalating Pha..."
2,uri,http://www.wikidata.org/entity/Q89154124,en,literal,A Study of a Candidate COVID-19 Vaccine (COV001)
3,uri,http://www.wikidata.org/entity/Q92274099,en,literal,Safety and Immunogenicity Study of Inactivated...
4,uri,http://www.wikidata.org/entity/Q93328935,en,literal,Evaluation of the Safety and Immunogenicity of...
5,uri,http://www.wikidata.org/entity/Q93328984,en,literal,"Study to Describe the Safety, Tolerability, Im..."
6,uri,http://www.wikidata.org/entity/Q94234839,en,literal,Safety and Immunogenicity Study of Inactivated...
7,uri,http://www.wikidata.org/entity/Q95056331,en,literal,Phase I/II Clinical Trial of Recombinant Novel...
8,uri,http://www.wikidata.org/entity/Q95718298,en,literal,Investigating a Vaccine Against COVID-19
9,uri,http://www.wikidata.org/entity/Q95719961,en,literal,Dose-Confirmation Study to Evaluate the Safety...


 ## 1.3 Clinical trials at Vanderbilt University ##

In [6]:
query_string = """
SELECT ?trial ?trialLabel WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P6153 [ wdt:P749* wd:Q29052  ] .}
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_at_Vanderbilt_University.csv")
result

Unnamed: 0,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q63807967,en,literal,Temozolomide With or Without Veliparib in Trea...
1,uri,http://www.wikidata.org/entity/Q63807976,en,literal,Combination Chemotherapy With or Without Blina...
2,uri,http://www.wikidata.org/entity/Q63807978,en,literal,Brentuximab Vedotin or Crizotinib and Combinat...
3,uri,http://www.wikidata.org/entity/Q63807982,en,literal,Neratinib HER Mutation Basket Study (SUMMIT)
4,uri,http://www.wikidata.org/entity/Q63807992,en,literal,Rituximab and Combination Chemotherapy With or...
...,...,...,...,...,...
1386,uri,http://www.wikidata.org/entity/Q96759632,en,literal,Predicting the Quality of Response to Specific...
1387,uri,http://www.wikidata.org/entity/Q98605059,en,literal,Hypofractionated Radiotherapy Followed by Imme...
1388,uri,http://www.wikidata.org/entity/Q105788657,en,literal,A Study of DS-6000a in Subjects With Advanced ...
1389,uri,http://www.wikidata.org/entity/Q106967353,en,literal,Digital-storytelling Intervention for Rural-dw...


## 1.4 Clinical trials with Julie McElrath as principal investigator ##

In [7]:
query_string = """
SELECT ?trial ?trialLabel WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8329 wd:Q22006776 .}
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_with_Julie_McElrath_as_principal_investigator.csv")
result

Unnamed: 0,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q63573557,en,literal,"A Randomized, Double-blinded, Placebo-controll..."
1,uri,http://www.wikidata.org/entity/Q64796540,en,literal,A Phase I Trial to Evaluate the Safety and Imm...
2,uri,http://www.wikidata.org/entity/Q66032588,en,literal,Safety of and Immune Response to an HIV-1 Vacc...
3,uri,http://www.wikidata.org/entity/Q66034272,en,literal,Safety of and Immune Response to an HIV-1 DNA ...
4,uri,http://www.wikidata.org/entity/Q66034726,en,literal,Resistance to HIV Infection
5,uri,http://www.wikidata.org/entity/Q66040660,en,literal,"A Phase I, Multicenter, Randomized, Double-Bli..."
6,uri,http://www.wikidata.org/entity/Q66040667,en,literal,"A Phase I, Multicenter, Randomized, Double-Bli..."
7,uri,http://www.wikidata.org/entity/Q66040669,en,literal,"A Phase I, Randomized, Double-Blind, Placebo-C..."
8,uri,http://www.wikidata.org/entity/Q66040681,en,literal,A Phase II Clinical Trial to Evaluate the Immu...
9,uri,http://www.wikidata.org/entity/Q66040865,en,literal,"A Phase I, Multicenter, Randomized, Double-Bli..."


## 1.5 Clinical trials funded by Patient-Centered Outcomes Research Institute ##

In [8]:
query_string = """
SELECT ?trial ?trialLabel ?link WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8324 wd:Q7144950 .}
  UNION
  { ?trial wdt:P859 wd:Q7144950 .}
  ?trial ?link wd:Q7144950 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_funded_by_Patient-Centered_Outcomes_Research_Institute.csv")
result

Unnamed: 0,trial.type,trial.value,link.type,link.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q64606545,uri,http://www.wikidata.org/prop/direct/P767,en,literal,Patient Assisted Intervention for Neuropathy: ...
1,uri,http://www.wikidata.org/entity/Q64606545,uri,http://www.wikidata.org/prop/direct/P8324,en,literal,Patient Assisted Intervention for Neuropathy: ...
2,uri,http://www.wikidata.org/entity/Q64607589,uri,http://www.wikidata.org/prop/direct/P767,en,literal,BEnefits of Stroke Treatment Delivered Using a...
3,uri,http://www.wikidata.org/entity/Q64607589,uri,http://www.wikidata.org/prop/direct/P8324,en,literal,BEnefits of Stroke Treatment Delivered Using a...
4,uri,http://www.wikidata.org/entity/Q64611137,uri,http://www.wikidata.org/prop/direct/P8324,en,literal,Assessment of a Personalized Health Care Inter...
...,...,...,...,...,...,...,...
193,uri,http://www.wikidata.org/entity/Q64606385,uri,http://www.wikidata.org/prop/direct/P767,en,literal,Optimizing Health Outcomes in Patients With Sy...
194,uri,http://www.wikidata.org/entity/Q64606385,uri,http://www.wikidata.org/prop/direct/P8324,en,literal,Optimizing Health Outcomes in Patients With Sy...
195,uri,http://www.wikidata.org/entity/Q64606544,uri,http://www.wikidata.org/prop/direct/P767,en,literal,Long Term Outcomes of Lumbar Epidural Steroid ...
196,uri,http://www.wikidata.org/entity/Q64606544,uri,http://www.wikidata.org/prop/direct/P859,en,literal,Long Term Outcomes of Lumbar Epidural Steroid ...


# 2 Topics by count of clinical trials #

## 2.1 Medical conditions ##

In [9]:
query_string = """
SELECT DISTINCT ?condition ?conditionLabel (COUNT(?trial) AS ?count) 
WHERE
{
   ?trial p:P31/ps:P31/wdt:P279* wd:Q30612.
   ?trial wdt:P1050 ?condition .
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?condition ?conditionLabel ?count 
ORDER BY DESC(?count)
LIMIT 100"""


result=select(query_string)
result.to_csv("Medical_conditions.csv")
result

Unnamed: 0,condition.type,condition.value,conditionLabel.xml:lang,conditionLabel.type,conditionLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q12206,en,literal,diabetes,http://www.w3.org/2001/XMLSchema#integer,literal,10093
1,uri,http://www.wikidata.org/entity/Q179630,en,literal,syndrome,http://www.w3.org/2001/XMLSchema#integer,literal,7433
2,uri,http://www.wikidata.org/entity/Q33525,en,literal,carcinoma,http://www.w3.org/2001/XMLSchema#integer,literal,5613
3,uri,http://www.wikidata.org/entity/Q29496,en,literal,leukemia,http://www.w3.org/2001/XMLSchema#integer,literal,5018
4,uri,http://www.wikidata.org/entity/Q208414,en,literal,lymphoma,http://www.w3.org/2001/XMLSchema#integer,literal,4813
...,...,...,...,...,...,...,...,...
95,uri,http://www.wikidata.org/entity/Q189470,en,literal,fistula,http://www.w3.org/2001/XMLSchema#integer,literal,498
96,uri,http://www.wikidata.org/entity/Q938205,en,literal,neuroblastoma,http://www.w3.org/2001/XMLSchema#integer,literal,498
97,uri,http://www.wikidata.org/entity/Q103177,en,literal,severe acute respiratory syndrome,http://www.w3.org/2001/XMLSchema#integer,literal,497
98,uri,http://www.wikidata.org/entity/Q124292,en,literal,cholangiocarcinoma,http://www.w3.org/2001/XMLSchema#integer,literal,490


## 2.2 Research interventions ##

In [10]:
query_string = """
SELECT DISTINCT ?intervention ?interventionLabel (COUNT(?trial) AS ?count) 
WHERE
{
   ?trial p:P31/ps:P31/wdt:P279* wd:Q30612.
   ?trial wdt:P4844 ?intervention .
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?intervention ?interventionLabel ?count 
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Research_interventions.csv")
result

Unnamed: 0,intervention.type,intervention.value,interventionLabel.xml:lang,interventionLabel.type,interventionLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q408524,en,literal,(RS)-cyclophosphamide,http://www.w3.org/2001/XMLSchema#integer,literal,5180
1,uri,http://www.wikidata.org/entity/Q423762,en,literal,paclitaxel,http://www.w3.org/2001/XMLSchema#integer,literal,3188
2,uri,http://www.wikidata.org/entity/Q50430271,en,literal,"Anesthetics, Combined",http://www.w3.org/2001/XMLSchema#integer,literal,3182
3,uri,http://www.wikidata.org/entity/Q412415,en,literal,cisplatin,http://www.w3.org/2001/XMLSchema#integer,literal,2625
4,uri,http://www.wikidata.org/entity/Q415588,en,literal,carboplatin,http://www.w3.org/2001/XMLSchema#integer,literal,2323
...,...,...,...,...,...,...,...,...
2486,uri,http://www.wikidata.org/entity/Q50265665,en,literal,insulin,http://www.w3.org/2001/XMLSchema#integer,literal,1
2487,uri,http://www.wikidata.org/entity/Q46994,en,literal,telehealth,http://www.w3.org/2001/XMLSchema#integer,literal,1
2488,uri,http://www.wikidata.org/entity/Q7598360,en,literal,standard of care,http://www.w3.org/2001/XMLSchema#integer,literal,1
2489,uri,http://www.wikidata.org/entity/Q6576792,en,literal,online community,http://www.w3.org/2001/XMLSchema#integer,literal,1


## 2.3 Research sites ##

In [11]:
query_string = """
SELECT DISTINCT ?research_site ?research_siteLabel (COUNT(?trial) AS ?count)  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P6153 ?research_site . }
  UNION
  { ?trial wdt:P6153 [wdt:P749 ?research_site] . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?research_site ?research_siteLabel ?count
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Research_sites.csv")
result

Unnamed: 0,research_site.type,research_site.value,research_siteLabel.xml:lang,research_siteLabel.type,research_siteLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q7906562,en,literal,VA Boston Healthcare System,http://www.w3.org/2001/XMLSchema#integer,literal,5369
1,uri,http://www.wikidata.org/entity/Q184478,en,literal,University of California,http://www.w3.org/2001/XMLSchema#integer,literal,4021
2,uri,http://www.wikidata.org/entity/Q7140719,en,literal,Partners HealthCare,http://www.w3.org/2001/XMLSchema#integer,literal,3927
3,uri,http://www.wikidata.org/entity/Q1130172,en,literal,Mayo Clinic,http://www.w3.org/2001/XMLSchema#integer,literal,3511
4,uri,http://www.wikidata.org/entity/Q777403,en,literal,Washington University in St. Louis,http://www.w3.org/2001/XMLSchema#integer,literal,3241
...,...,...,...,...,...,...,...,...
4252,uri,http://www.wikidata.org/entity/Q45312,en,literal,King Edward Medical University,http://www.w3.org/2001/XMLSchema#integer,literal,1
4253,uri,http://www.wikidata.org/entity/Q8002474,en,literal,Wilkes University,http://www.w3.org/2001/XMLSchema#integer,literal,1
4254,uri,http://www.wikidata.org/entity/Q3113883,en,literal,Ministry of Health of Spain,http://www.w3.org/2001/XMLSchema#integer,literal,1
4255,uri,http://www.wikidata.org/entity/Q55680455,en,literal,General Secretariat for Scientific Policy Coor...,http://www.w3.org/2001/XMLSchema#integer,literal,1


## 2.4 Principal investigators ##

In [12]:
query_string = """#defaultView:BubbleChart
SELECT (COUNT(DISTINCT ?trial) AS ?count) ?PI ?PILabel 
WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8329 ?PI .}

  { ?PI wdt:P108 ?org }  
  UNION
  { ?PI wdt:P1416 ?org }  

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?PI ?PILabel 
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Principal_investigators.csv")
result

Unnamed: 0,PI.type,PI.value,PILabel.xml:lang,PILabel.type,PILabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q91133737,en,literal,Italo O. Biaggioni,http://www.w3.org/2001/XMLSchema#integer,literal,33
1,uri,http://www.wikidata.org/entity/Q90057565,en,literal,Nancy J. Brown,http://www.w3.org/2001/XMLSchema#integer,literal,22
2,uri,http://www.wikidata.org/entity/Q42415822,en,literal,Satish R. Raj,http://www.w3.org/2001/XMLSchema#integer,literal,19
3,uri,http://www.wikidata.org/entity/Q91133228,en,literal,Barbara A. Murphy,http://www.w3.org/2001/XMLSchema#integer,literal,19
4,uri,http://www.wikidata.org/entity/Q88027010,en,literal,Anuradha Bapsi Chakravarthy,http://www.w3.org/2001/XMLSchema#integer,literal,18
...,...,...,...,...,...,...,...,...
373,uri,http://www.wikidata.org/entity/Q83557098,en,literal,Autumn Kujawa,http://www.w3.org/2001/XMLSchema#integer,literal,1
374,uri,http://www.wikidata.org/entity/Q88029228,en,literal,"Myrick Clements Shinall, Jr.",http://www.w3.org/2001/XMLSchema#integer,literal,1
375,uri,http://www.wikidata.org/entity/Q88032988,en,literal,Dan M. Roden,http://www.w3.org/2001/XMLSchema#integer,literal,1
376,uri,http://www.wikidata.org/entity/Q89234636,en,literal,Bryan E. Shepherd,http://www.w3.org/2001/XMLSchema#integer,literal,1


## 2.5 Funders ##

In [13]:
query_string = """
SELECT DISTINCT ?funder ?funderLabel (COUNT(?trial) AS ?count) 
WHERE
{
   ?trial p:P31/ps:P31/wdt:P279* wd:Q30612.
  { ?trial wdt:P859 ?funder .}
  UNION
  { ?trial wdt:P8324 ?funder .}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?funder ?funderLabel ?count 
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Funders.csv")
result

Unnamed: 0,funder.type,funder.value,funderLabel.xml:lang,funderLabel.type,funderLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q212322,en,literal,GlaxoSmithKline,http://www.w3.org/2001/XMLSchema#integer,literal,3359
1,uri,http://www.wikidata.org/entity/Q507154,en,literal,Novartis,http://www.w3.org/2001/XMLSchema#integer,literal,2206
2,uri,http://www.wikidata.org/entity/Q212646,en,literal,Roche Holding,http://www.w3.org/2001/XMLSchema#integer,literal,1821
3,uri,http://www.wikidata.org/entity/Q1418766,en,literal,National Taiwan University Hospital,http://www.w3.org/2001/XMLSchema#integer,literal,1679
4,uri,http://www.wikidata.org/entity/Q266423,en,literal,Bristol-Myers Squibb,http://www.w3.org/2001/XMLSchema#integer,literal,1162
...,...,...,...,...,...,...,...,...
3042,uri,http://www.wikidata.org/entity/Q98652284,en,literal,"Walvax Biotechnology Co., Ltd.",http://www.w3.org/2001/XMLSchema#integer,literal,1
3043,uri,http://www.wikidata.org/entity/Q30253359,en,literal,Public Health Foundation Enterprises,http://www.w3.org/2001/XMLSchema#integer,literal,1
3044,uri,http://www.wikidata.org/entity/Q1075148,en,literal,"University of California, Riverside",http://www.w3.org/2001/XMLSchema#integer,literal,1
3045,uri,http://www.wikidata.org/entity/Q65076661,en,literal,St. Luke's Boise Medical Center,http://www.w3.org/2001/XMLSchema#integer,literal,1


# 3 Organizational affiliations #









## 3.1 Clinical trials with principal investigator and their affiliation ##

In [14]:
query_string = """
SELECT ?trial ?trialLabel ?PI ?PILabel ?org ?orgLabel  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8329 ?PI .}

  { ?PI wdt:P108 ?org }  
  UNION
  { ?PI wdt:P1416 ?org }  

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_with_principal_investigator_and_their_affiliation.csv")
result

Unnamed: 0,trial.type,trial.value,PI.type,PI.value,org.type,org.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value,PILabel.xml:lang,PILabel.type,PILabel.value,orgLabel.xml:lang,orgLabel.type,orgLabel.value
0,uri,http://www.wikidata.org/entity/Q65466006,uri,http://www.wikidata.org/entity/Q47161235,uri,http://www.wikidata.org/entity/Q29052,en,literal,Pediatric Acute Kidney Injury (AKI) Retrospect...,en,literal,Sara L. Van Driest,en,literal,Vanderbilt University
1,uri,http://www.wikidata.org/entity/Q64149990,uri,http://www.wikidata.org/entity/Q47493038,uri,http://www.wikidata.org/entity/Q29052,en,literal,Levocarnitine for Dry Eye in Sjogren's Syndrome,en,literal,Leslie J. Crofford,en,literal,Vanderbilt University
2,uri,http://www.wikidata.org/entity/Q64801405,uri,http://www.wikidata.org/entity/Q47493038,uri,http://www.wikidata.org/entity/Q29052,en,literal,Memantine for the Treatment of Cognitive Impai...,en,literal,Leslie J. Crofford,en,literal,Vanderbilt University
3,uri,http://www.wikidata.org/entity/Q64662092,uri,http://www.wikidata.org/entity/Q47502872,uri,http://www.wikidata.org/entity/Q29052,en,literal,Reduced Calorie Diet Intervention in Kidney Tr...,en,literal,Kelly A. Birdwell,en,literal,Vanderbilt University
4,uri,http://www.wikidata.org/entity/Q64662092,uri,http://www.wikidata.org/entity/Q47502872,uri,http://www.wikidata.org/entity/Q7914455,en,literal,Reduced Calorie Diet Intervention in Kidney Tr...,en,literal,Kelly A. Birdwell,en,literal,Vanderbilt University Medical Center
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2216,uri,http://www.wikidata.org/entity/Q63397726,uri,http://www.wikidata.org/entity/Q91133311,uri,http://www.wikidata.org/entity/Q29052,en,literal,Collaborative Assessment of ICU Recovery Needs,en,literal,Carla M. Sevin,en,literal,Vanderbilt University
2217,uri,http://www.wikidata.org/entity/Q63013312,uri,http://www.wikidata.org/entity/Q91133452,uri,http://www.wikidata.org/entity/Q29052,en,literal,Metabolism-informed Care for Smoking Cessation,en,literal,Dawn M. Beaulieu,en,literal,Vanderbilt University
2218,uri,http://www.wikidata.org/entity/Q61909920,uri,http://www.wikidata.org/entity/Q91133485,uri,http://www.wikidata.org/entity/Q29052,en,literal,Assessment of Duodenal Epithelial Integrity in...,en,literal,Dhyanesh A. Patel,en,literal,Vanderbilt University
2219,uri,http://www.wikidata.org/entity/Q62025113,uri,http://www.wikidata.org/entity/Q91133485,uri,http://www.wikidata.org/entity/Q29052,en,literal,Botox or Botox With Esophageal Dilation in Pat...,en,literal,Dhyanesh A. Patel,en,literal,Vanderbilt University


## 3.2 Clinical trials where principal investigator has Vanderbilt University affiliation ##

In [15]:
query_string = """
# Q29052 is Vanderbilt University
PREFIX target: <http://www.wikidata.org/entity/Q29052>

SELECT ?trial ?trialLabel ?PI ?PILabel  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8329 ?PI .}

  { ?PI wdt:P108 [ wdt:P749* target: ] }  
  UNION
  { ?PI wdt:P1416 [ wdt:P749* target: ] }  

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_where_principal_investigator_has_Vanderbilt_University_affiliation.csv")
result

Unnamed: 0,PI.type,PI.value,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value,PILabel.xml:lang,PILabel.type,PILabel.value
0,uri,http://www.wikidata.org/entity/Q4734737,uri,http://www.wikidata.org/entity/Q63829569,en,literal,Oxidative Stress in Chronic Kidney Disease: Di...,en,literal,Alp Ikizler
1,uri,http://www.wikidata.org/entity/Q4734737,uri,http://www.wikidata.org/entity/Q63830668,en,literal,The Measurement of Insulin Resistance in Perit...,en,literal,Alp Ikizler
2,uri,http://www.wikidata.org/entity/Q4734737,uri,http://www.wikidata.org/entity/Q64151795,en,literal,Tissue Sodium in Pre-hypertensive Patients,en,literal,Alp Ikizler
3,uri,http://www.wikidata.org/entity/Q4734737,uri,http://www.wikidata.org/entity/Q64719736,en,literal,"Vitamin D, Insulin Resistance and Inflammation...",en,literal,Alp Ikizler
4,uri,http://www.wikidata.org/entity/Q4734737,uri,http://www.wikidata.org/entity/Q64719824,en,literal,Omega-3 Fatty Acid Administration in Dialysis ...,en,literal,Alp Ikizler
...,...,...,...,...,...,...,...,...,...,...
1903,uri,http://www.wikidata.org/entity/Q83451321,uri,http://www.wikidata.org/entity/Q83805784,en,literal,Neuroplasticity-Based Cognitive Remediation fo...,en,literal,Paul A. Newhouse
1904,uri,http://www.wikidata.org/entity/Q89166741,uri,http://www.wikidata.org/entity/Q86283322,en,literal,Optimizing Psychosocial Treatment of Interstit...,en,literal,Lindsey Colman McKernan
1905,uri,http://www.wikidata.org/entity/Q56950496,uri,http://www.wikidata.org/entity/Q86291220,en,literal,Pharmacologic Modulation of Hippocampal Activi...,en,literal,Stephan Heckers
1906,uri,http://www.wikidata.org/entity/Q83451296,uri,http://www.wikidata.org/entity/Q92549531,en,literal,Enhancing Social Competence in Adults With Autism,en,literal,Blythe Anne Corbett


## 3.3 Chart of organizations by count of clinical trials ##

In [16]:
query_string = """
#defaultView:BubbleChart
SELECT (COUNT(DISTINCT ?trial) AS ?count) ?org ?orgLabel 
WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P8329 ?PI .}

  { ?PI wdt:P108 ?org }  
  UNION
  { ?PI wdt:P1416 ?org }  

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?org ?orgLabel 
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Chart_of_organizations_by_count_of_clinical_trials.csv")
result

Unnamed: 0,org.type,org.value,orgLabel.xml:lang,orgLabel.type,orgLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q29052,en,literal,Vanderbilt University,http://www.w3.org/2001/XMLSchema#integer,literal,882
1,uri,http://www.wikidata.org/entity/Q89953931,en,literal,Vanderbilt Department of Medicine,http://www.w3.org/2001/XMLSchema#integer,literal,445
2,uri,http://www.wikidata.org/entity/Q89953976,en,literal,Vanderbilt Department of Pediatrics,http://www.w3.org/2001/XMLSchema#integer,literal,85
3,uri,http://www.wikidata.org/entity/Q7914455,en,literal,Vanderbilt University Medical Center,http://www.w3.org/2001/XMLSchema#integer,literal,60
4,uri,http://www.wikidata.org/entity/Q89951871,en,literal,Vanderbilt Department of Anesthesiology,http://www.w3.org/2001/XMLSchema#integer,literal,48
...,...,...,...,...,...,...,...,...
86,uri,http://www.wikidata.org/entity/Q5547065,en,literal,Georgetown University School of Medicine,http://www.w3.org/2001/XMLSchema#integer,literal,1
87,uri,http://www.wikidata.org/entity/Q503419,en,literal,University of Arizona,http://www.w3.org/2001/XMLSchema#integer,literal,1
88,uri,http://www.wikidata.org/entity/Q5611155,en,literal,Kaiser Permanente Washington Health Research I...,http://www.w3.org/2001/XMLSchema#integer,literal,1
89,uri,http://www.wikidata.org/entity/Q89953915,en,literal,Vanderbilt Department of Dermatology,http://www.w3.org/2001/XMLSchema#integer,literal,1


## 3.4 Clinical trials where the sponsor was Pfizer ##

In [17]:
query_string = """
SELECT ?trial ?trialLabel  WHERE {
  ?trial wdt:P31 wd:Q30612 .
  { ?trial wdt:P859 wd:Q206921 .}
  UNION
  { ?trial wdt:P8324 wd:Q206921 .}

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_where_the_sponsor_was_Pfizer.csv")
result

Unnamed: 0,trial.type,trial.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value
0,uri,http://www.wikidata.org/entity/Q297324,en,literal,ASCOT
1,uri,http://www.wikidata.org/entity/Q61862248,en,literal,Long Term Safety and Efficacy Study of Tanezum...
2,uri,http://www.wikidata.org/entity/Q61865085,en,literal,Absorption and Systemic Study of AN2690 in Pat...
3,uri,http://www.wikidata.org/entity/Q61894334,en,literal,Effect Of Itraconazole On The Pharmacokinetics...
4,uri,http://www.wikidata.org/entity/Q61894391,en,literal,"A Multicenter Phase 3, Open-Label Study of Bos..."
...,...,...,...,...,...
724,uri,http://www.wikidata.org/entity/Q102152880,en,literal,"Study to Evaluate the Safety, Tolerability, an..."
725,uri,http://www.wikidata.org/entity/Q102153068,en,literal,BRAF V600E-mutant Colorectal Cancer Study of E...
726,uri,http://www.wikidata.org/entity/Q102153289,en,literal,Study To Evaluate The Effect Of Two Steady Sta...
727,uri,http://www.wikidata.org/entity/Q102153533,en,literal,A Study To Asses Mass Balance And Absolute Bio...


# 4 Researcher demographics #

## 4.1 Count of principal investigators by gender ##

In [18]:
query_string = """
SELECT (COUNT(?trial) AS ?count) ?gender ?genderLabel WHERE {
  ?trial wdt:P31 wd:Q30612 .
  ?trial wdt:P8329 ?pi .
  ?pi wdt:P21 ?gender .
  ?pi wikibase:sitelinks ?sl .
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?count  ?gender ?genderLabel"""

result=select(query_string)
result.to_csv("Count_of_principal_investigators_by_gender.csv")
result

Unnamed: 0,count.datatype,count.type,count.value,gender.type,gender.value,genderLabel.xml:lang,genderLabel.type,genderLabel.value
0,http://www.w3.org/2001/XMLSchema#integer,literal,613,uri,http://www.wikidata.org/entity/Q6581097,en,literal,male
1,http://www.w3.org/2001/XMLSchema#integer,literal,346,uri,http://www.wikidata.org/entity/Q6581072,en,literal,female


## 4.2 Clinical trials where the principal investigator is female ##

In [19]:
query_string = """
SELECT ?trial ?trialLabel ?pi ?piLabel ?sl WHERE {
  ?trial wdt:P31 wd:Q30612 .
  ?trial wdt:P8329 ?pi .
  ?pi wdt:P21 wd:Q6581072 .
  ?pi wikibase:sitelinks ?sl .
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""

result=select(query_string)
result.to_csv("Clinical_trials_where_the_principal_investigator_is_female.csv")
result

Unnamed: 0,trial.type,trial.value,pi.type,pi.value,sl.datatype,sl.type,sl.value,trialLabel.xml:lang,trialLabel.type,trialLabel.value,piLabel.xml:lang,piLabel.type,piLabel.value
0,uri,http://www.wikidata.org/entity/Q65466006,uri,http://www.wikidata.org/entity/Q47161235,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Pediatric Acute Kidney Injury (AKI) Retrospect...,en,literal,Sara L. Van Driest
1,uri,http://www.wikidata.org/entity/Q64149990,uri,http://www.wikidata.org/entity/Q47493038,http://www.w3.org/2001/XMLSchema#integer,literal,1,en,literal,Levocarnitine for Dry Eye in Sjogren's Syndrome,en,literal,Leslie J. Crofford
2,uri,http://www.wikidata.org/entity/Q64801405,uri,http://www.wikidata.org/entity/Q47493038,http://www.w3.org/2001/XMLSchema#integer,literal,1,en,literal,Memantine for the Treatment of Cognitive Impai...,en,literal,Leslie J. Crofford
3,uri,http://www.wikidata.org/entity/Q64662092,uri,http://www.wikidata.org/entity/Q47502872,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Reduced Calorie Diet Intervention in Kidney Tr...,en,literal,Kelly A. Birdwell
4,uri,http://www.wikidata.org/entity/Q66402861,uri,http://www.wikidata.org/entity/Q47744325,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Collaboration for Antepartum Risk Evaluation,en,literal,Julia C. Phillippi
...,...,...,...,...,...,...,...,...,...,...,...,...,...
341,uri,http://www.wikidata.org/entity/Q63597543,uri,http://www.wikidata.org/entity/Q91133561,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Nivolumab Plus Relatlimab or Ipilimumab in Met...,en,literal,Elizabeth J Davis
342,uri,http://www.wikidata.org/entity/Q63596706,uri,http://www.wikidata.org/entity/Q91133566,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Effect of Providing Stratification of Low Risk...,en,literal,Elizabeth J. Phillips
343,uri,http://www.wikidata.org/entity/Q63596709,uri,http://www.wikidata.org/entity/Q91133566,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,The Effect of Providing Stratification of Low ...,en,literal,Elizabeth J. Phillips
344,uri,http://www.wikidata.org/entity/Q65336883,uri,http://www.wikidata.org/entity/Q91133566,http://www.w3.org/2001/XMLSchema#integer,literal,0,en,literal,Immunogenetics of Heparin-Induced Thrombocytop...,en,literal,Elizabeth J. Phillips


## 4.3 Principal investigators by occupation ##

In [20]:
query_string = """
SELECT DISTINCT ?occupation ?occupationLabel (COUNT(?trial) AS ?count) ?sl WHERE {
  ?trial wdt:P31 wd:Q30612 .
  ?trial wdt:P8329 ?pi .
  ?pi wdt:P106 ?occupation .
  ?occupation wikibase:sitelinks ?sl .
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?count ?occupation ?occupationLabel ?sl
ORDER BY DESC(?count)"""

result=select(query_string)
result.to_csv("Principal_investigators_by_occupation.csv")
result

Unnamed: 0,occupation.type,occupation.value,occupationLabel.xml:lang,occupationLabel.type,occupationLabel.value,count.datatype,count.type,count.value,sl.datatype,sl.type,sl.value
0,uri,http://www.wikidata.org/entity/Q1650915,en,literal,researcher,http://www.w3.org/2001/XMLSchema#integer,literal,466,http://www.w3.org/2001/XMLSchema#integer,literal,32
1,uri,http://www.wikidata.org/entity/Q15401884,en,literal,medical researcher,http://www.w3.org/2001/XMLSchema#integer,literal,91,http://www.w3.org/2001/XMLSchema#integer,literal,1
2,uri,http://www.wikidata.org/entity/Q1622272,en,literal,university teacher,http://www.w3.org/2001/XMLSchema#integer,literal,52,http://www.w3.org/2001/XMLSchema#integer,literal,13
3,uri,http://www.wikidata.org/entity/Q39631,en,literal,physician,http://www.w3.org/2001/XMLSchema#integer,literal,37,http://www.w3.org/2001/XMLSchema#integer,literal,156
4,uri,http://www.wikidata.org/entity/Q24017632,en,literal,pulmonologist,http://www.w3.org/2001/XMLSchema#integer,literal,24,http://www.w3.org/2001/XMLSchema#integer,literal,1
5,uri,http://www.wikidata.org/entity/Q121594,en,literal,professor,http://www.w3.org/2001/XMLSchema#integer,literal,18,http://www.w3.org/2001/XMLSchema#integer,literal,95
6,uri,http://www.wikidata.org/entity/Q1919436,en,literal,pediatrician,http://www.w3.org/2001/XMLSchema#integer,literal,12,http://www.w3.org/2001/XMLSchema#integer,literal,12
7,uri,http://www.wikidata.org/entity/Q3368718,en,literal,pathologist,http://www.w3.org/2001/XMLSchema#integer,literal,12,http://www.w3.org/2001/XMLSchema#integer,literal,10
8,uri,http://www.wikidata.org/entity/Q212980,en,literal,psychologist,http://www.w3.org/2001/XMLSchema#integer,literal,10,http://www.w3.org/2001/XMLSchema#integer,literal,55
9,uri,http://www.wikidata.org/entity/Q2114605,en,literal,pharmacologist,http://www.w3.org/2001/XMLSchema#integer,literal,10,http://www.w3.org/2001/XMLSchema#integer,literal,4


# 5 Scope of Wikidata's clinical trials content #  

## 5.1 List of clinical trials ##

In [21]:
query_string = """
# List of clinical trials
SELECT ?item ?nct_id ?itemLabel ?phaseLabel ?enrollment ?start_date ?primary_completion_date
WHERE
{
   ?item p:P31/ps:P31/wdt:P279* wd:Q30612.
   ?item wdt:P3098 ?nct_id .
   OPTIONAL { ?item wdt:P580  ?start_date }
   OPTIONAL { ?item wdt:P582 ?primary_completion_date }
   OPTIONAL { ?item wdt:P6099 ?phase }
   OPTIONAL { ?item wdt:P1132 ?enrollment }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 100"""

result=select(query_string)
result.to_csv("List_of_clinical_trials.csv")
result

Unnamed: 0,item.type,item.value,nct_id.type,nct_id.value,itemLabel.xml:lang,itemLabel.type,itemLabel.value,start_date.datatype,start_date.type,start_date.value,primary_completion_date.datatype,primary_completion_date.type,primary_completion_date.value,enrollment.datatype,enrollment.type,enrollment.value,phaseLabel.xml:lang,phaseLabel.type,phaseLabel.value
0,uri,http://www.wikidata.org/entity/Q58846611,literal,NCT03339843,en,literal,Multiorgan Metabolic Imaging Response Assessme...,,,,,,,,,,,,
1,uri,http://www.wikidata.org/entity/Q58846782,literal,NCT03409848,en,literal,Ipilimumab or FOLFOX in Combination With Nivol...,,,,,,,,,,,,
2,uri,http://www.wikidata.org/entity/Q58846865,literal,NCT03495544,en,literal,Study Estimating Association Between Germline ...,,,,,,,,,,,,
3,uri,http://www.wikidata.org/entity/Q58846970,literal,NCT03571633,en,literal,Impact of Pegfilgrastim on Trastuzumab Anti-tu...,,,,,,,,,,,,
4,uri,http://www.wikidata.org/entity/Q58847000,literal,NCT03580070,en,literal,Changes in the Microenvironment of HPV-induced...,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,uri,http://www.wikidata.org/entity/Q61862149,literal,NCT02860000,en,literal,Alisertib With or Without Fulvestrant in Treat...,http://www.w3.org/2001/XMLSchema#dateTime,literal,2017-07-06T00:00:00Z,http://www.w3.org/2001/XMLSchema#dateTime,literal,2021-12-15T00:00:00Z,http://www.w3.org/2001/XMLSchema#decimal,literal,96,en,literal,phase II clinical trial
96,uri,http://www.wikidata.org/entity/Q61862157,literal,NCT02847013,en,literal,The Use of Liposomal Bupivacaine in TAP Blocks...,http://www.w3.org/2001/XMLSchema#dateTime,literal,2019-03-01T00:00:00Z,http://www.w3.org/2001/XMLSchema#dateTime,literal,2019-12-31T00:00:00Z,http://www.w3.org/2001/XMLSchema#decimal,literal,40,en,literal,phase II clinical trial
97,uri,http://www.wikidata.org/entity/Q61862157,literal,NCT02847013,en,literal,The Use of Liposomal Bupivacaine in TAP Blocks...,http://www.w3.org/2001/XMLSchema#dateTime,literal,2019-03-01T00:00:00Z,http://www.w3.org/2001/XMLSchema#dateTime,literal,2019-12-31T00:00:00Z,http://www.w3.org/2001/XMLSchema#decimal,literal,40,en,literal,phase III clinical trial
98,uri,http://www.wikidata.org/entity/Q61862160,literal,NCT02834780,en,literal,"Phase 1 Study to Evaluate the Safety, Pharmaco...",http://www.w3.org/2001/XMLSchema#dateTime,literal,2016-07-01T00:00:00Z,http://www.w3.org/2001/XMLSchema#dateTime,literal,2020-06-01T00:00:00Z,http://www.w3.org/2001/XMLSchema#decimal,literal,128,en,literal,phase I clinical trial


## 5.2 Count of clinical trials ##

In [22]:
query_string = """
# Count clinical trials
SELECT (count(distinct ?item) as ?count)
WHERE {?item p:P31/ps:P31/wdt:P279* wd:Q30612
}"""

result=select(query_string)
result.to_csv("Count_of_clinical_trials.csv")
result

Unnamed: 0,count.datatype,count.type,count.value
0,http://www.w3.org/2001/XMLSchema#integer,literal,356915


## 5.3 Most common properties applied to clinical trials ##

In [23]:
query_string = """
SELECT DISTINCT ?property ?propertyLabel ?count
WITH {
  SELECT DISTINCT ?item WHERE {
    ?item wdt:P31*/wdt:P279* wd:Q30612 .
  }
  LIMIT 400000
  } AS %items 
WITH {
  SELECT DISTINCT ?property (COUNT(*) AS ?count) WHERE {
  INCLUDE %items.
    ?item ?p [ ] .
    ?property a wikibase:Property;
                wikibase:claim ?p.
  }
  GROUP BY ?property 
  LIMIT 200
  } AS %results 
WHERE {
  INCLUDE %results.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
LIMIT 200"""

result=select(query_string)
result.to_csv("Most_common_properties_applied_to_clinical_trials.csv")
result

Unnamed: 0,property.type,property.value,propertyLabel.xml:lang,propertyLabel.type,propertyLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/P17,en,literal,country,http://www.w3.org/2001/XMLSchema#integer,literal,405665
1,uri,http://www.wikidata.org/entity/P31,en,literal,instance of,http://www.w3.org/2001/XMLSchema#integer,literal,357284
2,uri,http://www.wikidata.org/entity/P3098,en,literal,ClinicalTrials.gov ID,http://www.w3.org/2001/XMLSchema#integer,literal,356551
3,uri,http://www.wikidata.org/entity/P580,en,literal,start time,http://www.w3.org/2001/XMLSchema#integer,literal,351490
4,uri,http://www.wikidata.org/entity/P1132,en,literal,number of participants,http://www.w3.org/2001/XMLSchema#integer,literal,349638
...,...,...,...,...,...,...,...,...
129,uri,http://www.wikidata.org/entity/P585,en,literal,point in time,http://www.w3.org/2001/XMLSchema#integer,literal,1
130,uri,http://www.wikidata.org/entity/P576,en,literal,"dissolved, abolished or demolished date",http://www.w3.org/2001/XMLSchema#integer,literal,1
131,uri,http://www.wikidata.org/entity/P508,en,literal,BNCF Thesaurus ID,http://www.w3.org/2001/XMLSchema#integer,literal,1
132,uri,http://www.wikidata.org/entity/P287,en,literal,designed by,http://www.w3.org/2001/XMLSchema#integer,literal,1


## 5.4 Count of statements in clinical trial records ##

In [24]:
query_string = """
#defaultView:AreaChart
SELECT ?st ?ct {
  {
   BIND (0 AS ?ct)
   BIND (0 AS ?st)
  }
  UNION {
    SELECT ?st (COUNT(*) as ?ct)
    {
      ?item wdt:P31*/wdt:P279* wd:Q30612 ; wikibase:statements ?st
    }
    GROUP BY ?st
    ORDER BY ?st
  }
}"""

result=select(query_string)
result.to_csv("Count_of_statements_in_clinical_trial_records.csv")
result

Unnamed: 0,ct.datatype,ct.type,ct.value,st.datatype,st.type,st.value
0,http://www.w3.org/2001/XMLSchema#integer,literal,0,http://www.w3.org/2001/XMLSchema#integer,literal,0
1,http://www.w3.org/2001/XMLSchema#integer,literal,110,http://www.w3.org/2001/XMLSchema#integer,literal,42
2,http://www.w3.org/2001/XMLSchema#integer,literal,114,http://www.w3.org/2001/XMLSchema#integer,literal,41
3,http://www.w3.org/2001/XMLSchema#integer,literal,119,http://www.w3.org/2001/XMLSchema#integer,literal,40
4,http://www.w3.org/2001/XMLSchema#integer,literal,139,http://www.w3.org/2001/XMLSchema#integer,literal,39
...,...,...,...,...,...,...
138,http://www.w3.org/2001/XMLSchema#integer,literal,77,http://www.w3.org/2001/XMLSchema#integer,literal,47
139,http://www.w3.org/2001/XMLSchema#integer,literal,79,http://www.w3.org/2001/XMLSchema#integer,literal,46
140,http://www.w3.org/2001/XMLSchema#integer,literal,77,http://www.w3.org/2001/XMLSchema#integer,literal,45
141,http://www.w3.org/2001/XMLSchema#integer,literal,79,http://www.w3.org/2001/XMLSchema#integer,literal,44


## 5.5 Count of trial records in Wikidata per clinical trial registry ##

In [25]:
query_string = """
SELECT DISTINCT ?registry ?registryLabel ?count
WITH {
  SELECT DISTINCT ?item WHERE {
    ?item wdt:P31 wd:Q30612 ;
  }
  LIMIT 400000
} AS %items 
WITH {
  SELECT DISTINCT ?registry ?registryIDitem WHERE {
    ?registry wdt:P31 wd:Q2138567 .
    ?registryIDitem wdt:P1535 ?registry .
  }
  GROUP BY ?registry ?registryIDitem
  LIMIT 100
} AS %registries 
WITH {
  SELECT DISTINCT ?registry (COUNT(*) AS ?count) WHERE {
    INCLUDE %items.
    INCLUDE %registries.
    ?item ?p [ ] .
    ?property wdt:P1629 ?registryIDitem;
              wikibase:claim ?p.
    ?property wikibase:propertyType wikibase:ExternalId .
  }
  GROUP BY ?registry 
  LIMIT 100
} AS %results 
WHERE {
  INCLUDE %results.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
LIMIT 100"""

result=select(query_string)
result.to_csv("Count_of_trial_records_in_Wikidata_per_clinical_trial_registry.csv")
result

Unnamed: 0,registry.type,registry.value,registryLabel.xml:lang,registryLabel.type,registryLabel.value,count.datatype,count.type,count.value
0,uri,http://www.wikidata.org/entity/Q5133746,en,literal,ClinicalTrials.gov,http://www.w3.org/2001/XMLSchema#integer,literal,356517
1,uri,http://www.wikidata.org/entity/Q65242932,en,literal,Australian New Zealand Clinical Trials Registry,http://www.w3.org/2001/XMLSchema#integer,literal,45
2,uri,http://www.wikidata.org/entity/Q88374053,en,literal,Chinese Clinical Trial Registry,http://www.w3.org/2001/XMLSchema#integer,literal,2
