# Pharmacokinetics Database in SEEK
Upload of file assets to seek via the write/read API. Experimental data sets have been digitized from literature. The respective files are now made available via LiSyM-SEEK.

<img src="./data_extraction.png" width="500"/>

Proof-of-principle by **Matthias König, Hadas Leonov & Wolfgang Müller** 

In [38]:
# important ids (seekbeta.lisym.org)
REPLACEMENTS = {
    'KOENIG_ID': "17",
    'PROJECT_ID': "10",  # MM-PLF
    'INVESTIGATION_ID': "17",
    'HOMO_SAPIENS': "950657990",
}

In [39]:
import requests
import json
import string
from pprint import pprint
# base_url = 'http://localhost:3000/'
base_url = 'https://seekbeta.lisym.org/'

API_TOKEN = open("./token").readline().strip() #"user:password" encoded in base64
headers = {"Authorization": "Basic %s" %API_TOKEN,
           "Content-type": "application/vnd.api+json",
           "Accept": "application/vnd.api+json"}
file_upload_headers = headers.copy()
file_upload_headers.pop('Content-type');

In [40]:
def print_response(response_text):
    if (r.ok):
        obj = json.loads(response_text)
        print("Returned successfully posted object:\n",obj)
        return obj
    else:
        print("Error: ", r.text)
    return None

In [41]:
def init_json_data(file, replacements=REPLACEMENTS):
    json_format = json.load(open(file))
    json_data = json.dumps(json_format)
    for key, value in replacements.items():
        json_data = str.replace(json_data, key, value)
    return json_data

### Post caffeine study
The investigation and project already exist and can be referenced via their respective ids. Now we create a new study for the caffeine data.

In [42]:
study_file = "./pkdb/json/caffeine_study.json"
url = base_url + "studies"
json_data = init_json_data(study_file, replacements=REPLACEMENTS)
# pprint(json_data)
print('-'*80)
pprint(json.loads(json_data))
print('-'*80)
r = requests.post(url, headers=headers, data=json_data)
study = print_response(r.text)

--------------------------------------------------------------------------------
{'data': {'attributes': {'description': 'Pharmacokinetics data set for '
                                        'caffeine',
                         'experimentalists': '',
                         'other_creators': '',
                         'person_responsible_id': '17',
                         'policy': {'access': 'view',
                                    'permissions': [{'access': 'download',
                                                     'resource_id': '10',
                                                     'resource_type': 'projects'}]},
                         'title': 'PKDB Caffeine Study'},
          'relationships': {'creators': {'data': [{'id': '17',
                                                   'type': 'people'}]},
                            'investigation': {'data': {'id': '17',
                                                       'type': 'investigations'}},
           

In [43]:
# store study id for assays
REPLACEMENTS['STUDY_ID'] = study['data']['id']

### Experimental assays & Data files
Every digitized publication is handled as a separate experimental assay.

The corresponding data files (CSV data) are associated as data file to the assay.

In [46]:
author_id = 'Akinyinka2000'
REPLACEMENTS['AUTHOR_ID'] = author_id 
REPLACEMENTS['AUTHOR_DESCRIPTION'] = '{} Description'.format(author_id)

exp_assays = []
assay_file = "./pkdb/json/caffeine_exp_assay.json"
url = base_url + "assays"
json_data = init_json_data(assay_file)
r = requests.post(url, headers=headers, data=json_data)
assay = print_response(r.text)
exp_assays.append(assay)

Returned successfully posted object:
 {'jsonapi': {'version': '1.0'}, 'data': {'id': '88', 'attributes': {'policy': {'permissions': [{'resource_id': '10', 'resource_type': 'projects', 'access': 'manage'}], 'access': 'download'}, 'description': 'Data digitized from publication.\n\nAkinyinka2000 Description', 'tags': ['Blood sampling', 'Human', 'pharmacokinetics'], 'assay_type': {'label': None, 'uri': 'http://www.mygrid.org.uk/ontology/JERMOntology#mass_Spectrometry'}, 'assay_class': {'description': None, 'title': 'Experimental assay', 'key': 'EXP'}, 'technology_type': {'label': None, 'uri': 'http://www.mygrid.org.uk/ontology/JERMOntology#mass_Spectrometry'}, 'other_creators': None, 'title': 'Digitized pharmacokinetics data (Akinyinka2000)'}, 'meta': {'base_url': 'http://seek.lisym.org', 'api_version': '0.1', 'modified': '2018-04-30T10:08:08.676Z', 'created': '2018-04-30T10:08:08.443Z', 'uuid': '4e2e81b0-2e8c-0136-ce3f-0242ac120005'}, 'links': {'self': '/assays/88'}, 'type': 'assays', 'r

In [48]:
#First POST the meta-data
REPLACEMENTS['ASSAY_ID'] = assay['data']['id']
data_files = []

# create the metadata
df_file = "./pkpd/json/caffeine_data_file.json"
url = base_url + "data_files"
json_data = init_json_data(df_file)
r = requests.post(url, headers=headers, data=json_data)
data_file = print_response(r.text)

# second, add the file (PUT)
filepath = './pkpd/files/data/{}.csv'.format(author_id)
url = df['data']['attributes']['content_blobs'][0]['link']  # content_blobs array size = 1
r = requests.put(url, headers=file_upload_headers, files={"file": open(filepath, 'rb')})
print("Response Status:", r)


FileNotFoundError: [Errno 2] No such file or directory: './pkpd/json/caffeine_data_file.json'

# Cleanup SEEK

In [45]:
# delete experimental assays
for assay in exp_assays:
    assay_id = assay['data']['id']
    url = base_url + "assays/{}.json".format(assay_id)
    print(url)
    r = requests.delete(url, headers=headers)
    print_response(r.text)  

https://seekbeta.lisym.org/assays/87.json
Returned successfully posted object:
 {'status': 'ok'}


In [None]:
# delete study
study_id = study['data']['id']
url = base_url + "studies/{}.json".format(study_id)
print(url)
r = requests.delete(url, headers=headers)
print_response(r.text)

### Post an SOP linked to an Assay

In [10]:
#First POST the meta-data
sop_file = "./json/post_max_sop.json"
url = base_url + "sops"
json_data = init_json_data(sop_file, "API Test SOP")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
json_data = str.replace(json_data, "ASSAY_ID", assay['data']['id'])
r = requests.post(url, headers=headers, data=json_data)
sop = print_response(r.text)
#Second, add the file (PUT)
url = sop['data']['attributes']['content_blobs'][0]['link']  #only one file in an Sop 
r = requests.put(url, headers=file_upload_headers, files={"file": open('./files/meltdown.pdf', 'rb')})
print("Response Status: ",r)

Returned successfully posted object:
 {'data': {'id': '6', 'type': 'sops', 'attributes': {'policy': {'access': 'download', 'permissions': [{'resource_type': 'projects', 'resource_id': '10', 'access': 'edit'}]}, 'title': 'API Test SOP', 'description': 'This is the description', 'license': 'CC-BY-4.0', 'latest_version': 1, 'tags': ['tag1', 'tag2'], 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/sops/6?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:18.000Z', 'updated_at': '2018-04-13T08:58:18.000Z', 'content_blobs': [{'original_filename': 'a_pdf_file.pdf', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'application/pdf', 'link': 'http://localhost:3000/sops/6/content_blobs/62', 'size': None}], 'other_creators': 'John Smith, Jane Smith'}, 'relationships': {'creators': {'data': [{'id': '1', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'people': {'data': [{'id': '1', 't

### Post a Document linked to an Assay

In [11]:
#First POST the meta-data
doc_file = "./json/post_max_document.json"
url = base_url + "documents"
json_data = init_json_data(doc_file, "API Test Document")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
json_data = str.replace(json_data, "ASSAY_ID", assay['data']['id'])
r = requests.post(url, headers=headers, data=json_data)
doc = print_response(r.text)
#Second, add the file (PUT)
url = doc['data']['attributes']['content_blobs'][0]['link']  #only one file in an Sop 
r = requests.put(url, headers=file_upload_headers, files={"file": open('./files/meltdown.pdf', 'rb')})
print("Response status: ", r)

Returned successfully posted object:
 {'data': {'id': '6', 'type': 'documents', 'attributes': {'policy': {'access': 'download', 'permissions': [{'resource_type': 'projects', 'resource_id': '10', 'access': 'edit'}]}, 'title': 'API Test Document', 'description': 'This is the description', 'license': 'CC-BY-4.0', 'latest_version': 1, 'tags': ['tag1', 'tag2'], 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/documents/6?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:21.000Z', 'updated_at': '2018-04-13T08:58:21.000Z', 'content_blobs': [{'original_filename': 'a_pdf_file.pdf', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'application/pdf', 'link': 'http://localhost:3000/documents/6/content_blobs/63', 'size': None}], 'other_creators': 'John Smith, Jane Smith'}, 'relationships': {'creators': {'data': [{'id': '2', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'people': {'da

### Post a Model linked to an Assay

In [12]:
#First POST the meta-data
model_file = "./json/post_max_model.json"
url = base_url + "models"
json_data = init_json_data(model_file, "API Test Model")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
json_data = str.replace(json_data, "ASSAY_ID", assay['data']['id'])
r = requests.post(url, headers=headers, data=json_data)
mod = print_response(r.text)

#Second, add the file (PUT)
blob_url = lambda bi: mod['data']['attributes']['content_blobs'][bi]['link']
blob_type = lambda bi: str.replace(mod['data']['attributes']['content_blobs'][bi]['content_type'], "application/", "")
filename = lambda bi: "./files/model_"+str(bi+1)+"."+blob_type(bi)

for i in range(len(mod['data']['attributes']['content_blobs'])):
    print("uploading", filename(i), "to", blob_url(i))
    r = requests.put(blob_url(i), headers=file_upload_headers, files={"file": open(filename(i), 'rb')})
    print("Response Status:", r)

Returned successfully posted object:
 {'data': {'id': '11', 'type': 'models', 'attributes': {'policy': {'access': 'download', 'permissions': [{'resource_type': 'projects', 'resource_id': '10', 'access': 'edit'}]}, 'title': 'API Test Model', 'description': 'This is the description', 'license': 'CC-BY-4.0', 'latest_version': 1, 'tags': ['tag1', 'tag2'], 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/models/11?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:25.000Z', 'updated_at': '2018-04-13T08:58:25.000Z', 'content_blobs': [{'original_filename': 'docs.pdf', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'application/pdf', 'link': 'http://localhost:3000/models/11/content_blobs/64', 'size': None}, {'original_filename': 'docs2.pdf', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'application/pdf', 'link': 'http://localhost:3000/models/11/content_blobs/65', 'size': None}, {'original_fi

### Post a Presentation (linked to a Project)

In [13]:
#First POST the meta-data
pr_file = "./json/post_max_presentation.json"
url = base_url + "presentations"
json_data = init_json_data(pr_file, "API Test Presentation")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
r = requests.post(url, headers=headers, data=json_data)
pr = print_response(r.text)
#Second, add the file (PUT)
url = pr['data']['attributes']['content_blobs'][0]['link']  #only one file in a presentation
r = requests.put(url, headers=file_upload_headers, files={"file": open('./files/presentation.pdf', 'rb')})
print("Response Status: ", r)

Returned successfully posted object:
 {'data': {'id': '6', 'type': 'presentations', 'attributes': {'policy': {'access': 'download', 'permissions': [{'resource_type': 'projects', 'resource_id': '10', 'access': 'edit'}]}, 'title': 'API Test Presentation', 'description': 'This is the description', 'license': 'CC-BY-4.0', 'latest_version': 1, 'tags': ['tag1', 'tag2'], 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/presentations/6?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:28.000Z', 'updated_at': '2018-04-13T08:58:28.000Z', 'content_blobs': [{'original_filename': 'a_pdf_file.pdf', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'application/pdf', 'link': 'http://localhost:3000/presentations/6/content_blobs/67', 'size': None}], 'other_creators': 'John Smith, Jane Smith'}, 'relationships': {'creators': {'data': [{'id': '3', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}

### Post a Modeling Assay with linked Assets 

In [14]:
assay_file2 = "./json/post_max_assay2.json"
url = base_url + "assays"
json_data = init_json_data(assay_file2, "API Test Modeling Assay with linked assets")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
json_data = str.replace(json_data, "STUDY_ID", study['data']['id'])
json_data = str.replace(json_data, "SOP_ID", sop['data']['id'])
json_data = str.replace(json_data, "MODEL_ID", mod['data']['id'])
json_data = str.replace(json_data, "DOCUMENT_ID", doc['data']['id'])
json_data = str.replace(json_data, "DATAFILE_ID", df['data']['id'])

r = requests.post(url, headers=headers, data=json_data)
assay2 = print_response(r.text)

Returned successfully posted object:
 {'data': {'id': '15', 'type': 'assays', 'attributes': {'policy': {'access': 'download', 'permissions': [{'resource_type': 'projects', 'resource_id': '10', 'access': 'manage'}]}, 'title': 'API Test Modeling Assay with linked assets', 'description': 'modeling analysis', 'other_creators': 'Anonymous creator', 'assay_class': {'title': 'Modelling analysis', 'key': 'MODEL', 'description': None}, 'assay_type': {'label': None, 'uri': 'http://www.mygrid.org.uk/ontology/JERMOntology#Cell_cycle'}, 'technology_type': {'label': None, 'uri': None}, 'tags': ['Assay-tag1', 'Assay-tag2', 'Assay-tag3']}, 'relationships': {'creators': {'data': [{'id': '2', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'organisms': {'data': [{'id': '548322508', 'type': 'organisms'}]}, 'people': {'data': [{'id': '1', 'type': 'people'}, {'id': '2', 'type': 'people'}]}, 'projects': {'data': [{'id': '10', 'type': 'projects'}]}, 'investigation': {'data': {'id

### Post a DataFile with remote content (URL) --> similar for other assets

In [15]:
df_file = "./json/post_remote_data_file.json"
url = base_url + "data_files"
json_data = init_json_data(df_file, "API remote Data File")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
json_data = str.replace(json_data, "ASSAY_ID", assay['data']['id'])
r = requests.post(url, headers=headers, data=json_data)

remote_df = print_response(r.text) 

Returned successfully posted object:
 {'data': {'id': '16', 'type': 'data_files', 'attributes': {'policy': {'access': 'no_access', 'permissions': []}, 'title': 'API remote Data File', 'description': None, 'license': None, 'latest_version': 1, 'tags': None, 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/data_files/16?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:41.000Z', 'updated_at': '2018-04-13T08:58:41.000Z', 'content_blobs': [{'original_filename': 'data-example.png', 'url': 'http://seek.virtual-liver.de/data_files/1110/content_blobs/1325/download', 'md5sum': None, 'sha1sum': None, 'content_type': 'image/png', 'link': 'http://localhost:3000/data_files/16/content_blobs/68', 'size': None}], 'other_creators': None}, 'relationships': {'creators': {'data': [{'id': '1', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'people': {'data': [{'id': '1', 'type': 'people'}]}, 'projects': {

### Post a Model with remote content

In [16]:
model_file = "./json/post_remote_model.json"
url = base_url + "models"
json_data = init_json_data(model_file, "API remote model example")
json_data = str.replace(json_data, "PROJECT_ID", proj['data']['id'])
r = requests.post(url, headers=headers, data=json_data)

remote_model = print_response(r.text) 

Returned successfully posted object:
 {'data': {'id': '12', 'type': 'models', 'attributes': {'policy': {'access': 'no_access', 'permissions': []}, 'title': 'API remote model example', 'description': None, 'license': None, 'latest_version': 1, 'tags': None, 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:3000/models/12?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2018-04-13T08:58:44.000Z', 'updated_at': '2018-04-13T08:58:44.000Z', 'content_blobs': [{'original_filename': 'download', 'url': 'http://seek.virtual-liver.de/data_files/1110/content_blobs/1325/download', 'md5sum': None, 'sha1sum': None, 'content_type': 'image/png', 'link': 'http://localhost:3000/models/12/content_blobs/69', 'size': None}, {'original_filename': '1110.xml', 'url': 'http://seek.virtual-liver.de/data_files/1110.xml', 'md5sum': None, 'sha1sum': None, 'content_type': 'application/xml', 'link': 'http://localhost:3000/models/12/content_blobs/70', 'size': None}], 