This notebook can be used to annotate the GWAS datasets on the ENIGMA wiki with the values for attributes such as Brain Region, SNP and Trait. These annotations are useful to test the hypothesis "Effect Size of Genotype on BrainImagingTrait of ROI is associated with Demographic" on the DISK portal.

In [1]:
import json
import requests
import pandas as pd

In [27]:
#Obtaining the content URLs for GWAS data downloads

replace = "http://localhost:8080/enigma_new/index.php/Special:URIResolver/"
query = """PREFIX wiki: <http://localhost:8080/enigma_new/index.php/Special:URIResolver/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX enigma: <https://w3id.org/enigma#>
SELECT ?gwasproject ?projresults ?url
WHERE 
{
    ?gwasproject wiki:Property-3AName_-28E-29 "GWAS".
    ?gwasproject wiki:Property-3AHasCohort_-28E-29 ?cohort.
    ?cohort wiki:Property-3AHasCohortProject_-28E-29 ?cohortproject.
    ?cohortproject wiki:Property-3AHasProjectResults ?projresults.
    ?projresults wiki:Property-3AHasContentUrl_-28E-29 ?url
}
"""

response = requests.post(url, data = {'query': query})
res = json.loads(response.text)
# print(res)

query_results=[]
print("Data Downloads:")    
for item in res['results']['bindings']:
    a1 = item['projresults']['value'].replace(replace,"")
    w1 = item['url']['value'].replace(replace,"")
    query_results.append([a1,w1])

df = pd.DataFrame(query_results)
df.columns=['Data Download','Content URL']
df.head(20)

Data Downloads:


Unnamed: 0,Data Download,Content URL
0,ASRB_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
1,FBIRN_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
2,FOR2107_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
3,HUBIN_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
4,MCIC_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
5,TOP_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
6,UMCU_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
7,UNICAMP_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
8,MPIP_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...
9,EPIGEN_ENIGMA3_Cortical_GWAS_Results,http://organicdatacuration.org/enigma_new/imag...


In [28]:
##This function identifies and returns the values a dataset containts for Area, Trait and SNP
def get_annotations(test):
    csv = pd.read_csv(test)
    df2 = pd.DataFrame(csv)
    area = set()
    trait = set()
    snp = set()
    for a in df2['AREA']:
        area.add(a)
    for t in df2['TRAIT']:
        trait.add(t)
    for s in df2['SNP']:
        snp.add(s)
    return area, trait, snp

In [None]:
##Logging into wiki

S = requests.Session()

URL = "http://organicdatacuration.org/enigma_new/api.php"

# Retrieve login token
PARAMS_0 = {
    'action':"query",
    'meta':"tokens",
    'type':"login",
    'format':"json"
}

DATA = S.get(url=URL, params=PARAMS_0).json()
LOGIN_TOKEN = DATA['query']['tokens']['logintoken']

print("Login Token: ",LOGIN_TOKEN)

# Go to http://organicdatacuration.org/enigma_new/index.php/Special:BotPasswords for lgname & lgpassword, and add them below

PARAMS_1 = {
    'action':"login",
    'lgname':"",
    'lgpassword':"",
    'lgtoken':LOGIN_TOKEN,
    'format':"json"
}

DATA = S.post(URL, data=PARAMS_1).json()

print(DATA)

In [34]:
##Test Annotation

idx = 2
test_title = df['Data Download'][idx]
url = df['Content URL'][idx]
print(url)
area1, trait1, snp1 = get_annotations(url)
traits = "|Trait=".join(trait1)
areas = "|BrainRegion=".join(area1)
snps = "|SNP=".join(snp1)
text_to_append="{{#set:|BrainRegion="+areas+"}}\n"+"{{#set:|Trait="+traits+"}}\n"+"{{#set:|SNP="+snps+"}}"
print(test_title)

PARAMS_2 = {
        "action": "query",
        "meta": "tokens",
        "format": "json"
    }

R = S.get(url=URL, params=PARAMS_2)
DATA = R.json()

CSRF_TOKEN = DATA['query']['tokens']['csrftoken']

# Step 4: POST request to edit a page

PARAMS_EDIT = {
    "action": "edit",
    "title": test_title,
    "section": "new",
    "format": "json",
    "text": text_to_append,
    "token": CSRF_TOKEN,
}

R = S.post(URL, data=PARAMS_EDIT)
DATA = R.json()

print(DATA)

http://organicdatacuration.org/enigma_new/images/a/a7/FOR2107_Significant_GWAS.csv
FOR2107_ENIGMA3_Cortical_GWAS_Results
{'edit': {'result': 'Success', 'pageid': 12732, 'title': 'FOR2107 ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28553, 'newrevid': 28554, 'newtimestamp': '2020-11-18T17:59:40Z'}}


In [50]:
##Annotating all the datasets

for idx, row in df.iterrows():
    test_title = df['Data Download'][idx]
    url = df['Content URL'][idx]
    area1, trait1, snp1 = get_annotations(url)
    traits = "|Trait=".join(trait1)
    areas = "|BrainRegion=".join(area1)
    snps = "|SNP=".join(snp1)
    text_to_append="{{#set:|BrainRegion="+areas+"}}\n"+"{{#set:|Trait="+traits+"}}\n"+"{{#set:|SNP="+snps+"}}"

    PARAMS_2 = {
            "action": "query",
            "meta": "tokens",
            "format": "json"
        }

    R = S.get(url=URL, params=PARAMS_2)
    DATA = R.json()

    CSRF_TOKEN = DATA['query']['tokens']['csrftoken']

    # Step 4: POST request to edit a page

    PARAMS_EDIT = {
        "action": "edit",
        "title": test_title,
        "section": "new",
        "format": "json",
        "text": text_to_append,
        "token": CSRF_TOKEN,
    }

    R = S.post(URL, data=PARAMS_EDIT)
    DATA = R.json()

    print(DATA)

{'edit': {'result': 'Success', 'pageid': 12666, 'title': 'ASRB ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28436, 'newrevid': 28555, 'newtimestamp': '2020-11-18T18:07:12Z'}}
{'edit': {'result': 'Success', 'pageid': 12729, 'title': 'FBIRN ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28519, 'newrevid': 28556, 'newtimestamp': '2020-11-18T18:07:13Z'}}
{'edit': {'result': 'Success', 'pageid': 12732, 'title': 'FOR2107 ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28554, 'newrevid': 28557, 'newtimestamp': '2020-11-18T18:07:14Z'}}
{'edit': {'result': 'Success', 'pageid': 12740, 'title': 'HUBIN ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28452, 'newrevid': 28558, 'newtimestamp': '2020-11-18T18:07:15Z'}}
{'edit': {'result': 'Success', 'pageid': 12754, 'title': 'MCIC ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28459, 'newrevid': 28559, 'newtimestamp': '2020-11-1

{'edit': {'result': 'Success', 'pageid': 12760, 'title': 'MooDS ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28462, 'newrevid': 28596, 'newtimestamp': '2020-11-18T18:07:46Z'}}
{'edit': {'result': 'Success', 'pageid': 12764, 'title': 'MPRC ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28464, 'newrevid': 28597, 'newtimestamp': '2020-11-18T18:07:47Z'}}
{'edit': {'result': 'Success', 'pageid': 12774, 'title': 'NTR ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28469, 'newrevid': 28598, 'newtimestamp': '2020-11-18T18:07:48Z'}}
{'edit': {'result': 'Success', 'pageid': 12778, 'title': 'OSAKA ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28471, 'newrevid': 28599, 'newtimestamp': '2020-11-18T18:07:48Z'}}
{'edit': {'result': 'Success', 'pageid': 12782, 'title': 'PDNZ ENIGMA3 Cortical GWAS Results', 'contentmodel': 'wikitext', 'oldrevid': 28473, 'newrevid': 28600, 'newtimestamp': '2020-11-18T18

In [51]:
##Testing query to extract dataset based on property Trait = "TH"

replace = "http://localhost:8080/enigma_new/index.php/Special:URIResolver/"
url = ""
query = """PREFIX wiki: <http://localhost:8080/enigma_new/index.php/Special:URIResolver/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX enigma: <https://w3id.org/enigma#>
SELECT ?d
WHERE 
{
    ?d a wiki:Category-3ADataDownload_-28E-29.
    ?d wiki:Property-3ATrait "TH".
}
"""

response = requests.post(url, data = {'query': query})
res = json.loads(response.text)
print(res)

query_results=[]
print("Data Downloads:")    
for item in res['results']['bindings']:
    a1 = item['d']['value'].replace(replace,"")
    query_results.append([a1])

dfresult = pd.DataFrame(query_results)
dfresult.columns=['Data Download']
dfresult

{'head': {'vars': ['d']}, 'results': {'bindings': [{'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/1000BRAINS_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/ADNI1_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/ADNI2GO_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/ALSPACa_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/ASRB_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/BrainScale_ENIGMA3_Cortical_GWAS_Results'}}, {'d': {'type': 'uri', 'value': 'http://localhost:8080/enigma_new/index.php/Special:URIResolver/CARDIFF_ENIGMA3_Cortical_GWAS_R

Unnamed: 0,Data Download
0,1000BRAINS_ENIGMA3_Cortical_GWAS_Results
1,ADNI1_ENIGMA3_Cortical_GWAS_Results
2,ADNI2GO_ENIGMA3_Cortical_GWAS_Results
3,ALSPACa_ENIGMA3_Cortical_GWAS_Results
4,ASRB_ENIGMA3_Cortical_GWAS_Results
5,BrainScale_ENIGMA3_Cortical_GWAS_Results
6,CARDIFF_ENIGMA3_Cortical_GWAS_Results
7,EPIGEN_ENIGMA3_Cortical_GWAS_Results
8,FBIRN_ENIGMA3_Cortical_GWAS_Results
9,FOR2107_ENIGMA3_Cortical_GWAS_Results
