In [1]:
from datetime import datetime, timedelta
import pprint
import json
import subprocess
import re
import requests

In [5]:
brapi_url = 'http://pippa.psb.ugent.be/pippa_experiments/brapi/v1'

# Functions

In [6]:
def get_germplasmName (name):
    '''
    BrAPI call to get germplasm by germPlasmName
    '''
    r = requests.get(
        '%s/germplasm-search/' % brapi_url,
        {
            'pageSize': 100 ,
            #'germplasmSpecies': 'Maize'
            'germplasmName': name
        }
    )
    json_data = r.json()
    return json_data

In [7]:
def get_phenotype (
    dbid = None , 
    studyid = None,
    url = '%s/phenotype-search/' % brapi_url, 
    page = 1, 
    page_size = 50):
    
    info = {
            'pageSize': page_size ,
            'page': page
        }
    
    if dbid:
        info['germplasmDbIds'] = dbid
    if studyid:
        info['studyDbIds'] = studyid
    
    r = requests.post(
        url,
        info

    )
    if r.status_code == 200:
        json_data = r.json()
    else:
        raise Exception('Perform experiment query failed!')

    return json_data

In [8]:
def generate_biosample_json (start = 0, maximum = 1): 
    '''
    Based on all phenotypes (because filtering did not work)
    with the parameters you can limit to certain indexes as they come from get_phenotype() (>= start, <= start + maximum)
    '''
    p = get_phenotype()
    index = 0
    
    jsons = []
    for i in p['result']['data']:
        if index < start:
            continue
        if index > start + maximum:
            break
            
        germplasm = get_germplasmName(i['germplasmName'])
        species = germplasm['result']['data'][0]['species']

        yesterday = datetime.now() - timedelta(1)
        sample_json = {
            "name": "{}_{}".format(i['studyDbId'],i['plantNumber']),
            "update": yesterday.strftime("%Y-%m-%dT%I:%M%:%S"),
            "release": yesterday.strftime("%Y-%m-%dT%I:%M%:%S"),
            "characteristics": {
                 "germplasm": [
                    {
                        "text": i['germplasmName'],
                        "ontologyTerms": ["https://pippa.psb.ugent.be/pippa_experiments/brapi/v1/germplasm-search?germplasmName=%s" % i['germplasmName'].replace(' ', '%20')]
                      }
                ],

                "organism": [
                    {
                        "text": species,
                        "ontologyTerms": [speciesOntology[species]]
                      }
                ]
            },
#            "relationships": [],
            "externalReferences": [
                { 
                  # NOT YET IMPLEMENTED IN OUR ENDPOINT "url": "https://pippa.psb.ugent.be/pippa_experiments/brapi/v1/phenotype-search?germplasmDbIds=%s" % i['germplasmDbId']
                  "url": "https://pippa.psb.ugent.be/pippa_experiments/brapi/v1/germplasm-search?germplasmName=%s" % i['germplasmName'].replace(' ', '%20')

                }
              ]
        }

        #pp = pprint.PrettyPrinter(indent=1)
        #pp.pprint(sample_json)
        jsons.append(sample_json)
        n -= 1
        if n <= 0:
            break
    return jsons

In [9]:
def runCode (code) :
    ssh = subprocess.Popen([code],
                       shell=True,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
    result = ssh.stdout.readlines()
    error = ssh.stderr.readlines()
    
    return(result, error)

## Generate bash files

In [10]:
# parameter : json file to submit to get bioSampleID
with open("BioSamplesData/getBioSampleID.sh", 'w') as f:
    f.write("cat \"$1\" | curl -X POST -H \"Content-Type: application/json\" -d @- \"http://byod.psblocal:8081/biosamples/beta/samples/\"\n")
    
# parameter : relative path to folder, bioSampleID 
# filename needs to be the same as bioSampleID, so enforced in rest of the code
with open("BioSamplesData/submitBioSample.sh", 'w') as f:
    f.write("cat \"$1/$2.json\" | curl -X PUT -H \"Content-Type: application/json\" -d @- \"http://byod.psblocal:8081/biosamples/beta/samples/$2\"\n")

# parameter : bioSampleID 
with open("BioSamplesData/getBioSample.sh", 'w') as f:
    f.write("curl -X GET -H \"Accept: application/hal+json\" \"http://byod.psblocal:8081/biosamples/beta/samples/$1\"\n")

## Make bash files executable

In [11]:
%%bash
chmod u+x "BioSamplesData/getBioSampleID.sh"
chmod u+x "BioSamplesData/submitBioSample.sh"
chmod u+x "BioSamplesData/getBioSample.sh"

## Species ontology mapping

As we don't have a species ontology in our VIB BrAPI endpoint, hardcoded hash

In [12]:
speciesOntology = {
    'Zea mays' : "http://purl.obolibrary.org/obo/NCBITaxon_4577"
}

# Generate BioSamples

In [None]:
for my_json in generate_biosample_json(start = 0, maximum = 1):
    filename = my_json['name'] + ".json"
    
    print ("writing json\n")
    with open("BioSamplesData/%s" % filename, 'w') as f:
        json.dump(my_json, f, sort_keys = True, indent = 4, ensure_ascii = False)
    print ("...done\n")

    print ("getting ID\n") 
    getBioSampleIDcode = 'BioSamplesData/getBioSampleID.sh %s' % ("BioSamplesData/%s" % filename)

    (result, error) = runCode(getBioSampleIDcode)
    print ("...done\n")
    for e in error:
        print ("%s" % e.decode("utf-8").rstrip())

    # overwriting the file we previously generated
    biosampleID = None
    pattern = re.compile('.*"accession" : "(\w*\d*)",.*')

    print ("pushing data\n")
    with open("BioSamplesData/%s" % filename, 'w') as f:
        for r in result:
            #print(r.decode("utf-8").rstrip())
            line = r.decode("utf-8").rstrip()
 
            if pattern.match(line):
                match = pattern.match(line)
                biosampleID = match.group(1)
                
            f.write(line)
            
    if biosampleID: # not None
        print(biosampleID)
        # writing the return BioSample object to a file
        with open("BioSamplesData/%s.json" % biosampleID, 'w') as f:
            for r in result:
                line = r.decode("utf-8").rstrip()
                f.write(line)
       
        # submitting
        print (">>> writing data\n")

        putBioSamplecode = 'BioSamplesData/submitBioSample.sh %s %s' % ("BioSamplesData", biosampleID)
        #print(putBioSamplecode)
        (result, error) = runCode(putBioSamplecode)
        for e in error:
            print ("%s" % e.decode("utf-8").rstrip())
        for r in result:
            print (r.decode("utf-8").rstrip())

        # getting it back from the DB
        print (">>> checking data\n")
        getBioSamplecode = 'BioSamplesData/getBioSample.sh %s' % (biosampleID)
        (result, error) = runCode(getBioSamplecode)
        for e in error:
            print ("%s" % e.decode("utf-8").rstrip())
        for r in result:
            print(r.decode("utf-8").rstrip())
    else:
        print("ERROR no BioSample ID\n")
    print ("...done\n")

             
    print(filename)