Skip to content

Commit

Permalink
upload scoresets (#1)
Browse files Browse the repository at this point in the history
* initial commit for uploads and downloads of scoresets

* upload experiment functionality

* examples of how to use the tool to upload and download experiments and scoresets

* for travis
  • Loading branch information
joemin committed Nov 26, 2021
1 parent 847a782 commit 69f3dfd
Show file tree
Hide file tree
Showing 24 changed files with 508 additions and 4 deletions.
Empty file added examples/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions examples/experiment_create.py
@@ -0,0 +1,25 @@
import os

from mavetools.client.client import Client
from mavetools.models.experiment import NewExperiment

base_url = os.getenv('MAVEDB_BASE_URL', '')
# experimentset_urn = 'tmp:jCICvwLCntIuKIsf'

# Generate a new auth_token in your profile and post it here
auth_token = 'AseyaNLLhqv9jAm0joMkq2oqB0bw3GKxTclkT2NtG340RF6CfdM2UC3j8Fv4RpbQ'
client = Client(base_url, auth_token=auth_token) if base_url else Client(auth_token=auth_token)

# POST
new_experiment = NewExperiment(
# experimentset=experimentset_urn,
title='exp_test_title',
short_description='exp_test_short_description',
abstract_text='test_abstract_text',
method_text='test_method_text',

sra_ids=['SRP109119'],
pubmed_ids=['23035249'],
doi_ids=['10.1038/s41467-019-11526-w'],
)
client.post_model_instance(new_experiment)
18 changes: 18 additions & 0 deletions examples/experiment_get.py
@@ -0,0 +1,18 @@
import attr, os
from pprint import PrettyPrinter

from mavetools.client.client import Client
from mavetools.models.experiment import Experiment


pp = PrettyPrinter(indent=2)
base_url = os.getenv('MAVEDB_BASE_URL', '')
experiment_urn = 'urn:mavedb:00000001-a'

# Generate a new auth_token in your profile and post it here
auth_token = 'AseyaNLLhqv9jAm0joMkq2oqB0bw3GKxTclkT2NtG340RF6CfdM2UC3j8Fv4RpbQ'
client = Client(base_url, auth_token=auth_token) if base_url else Client(auth_token=auth_token)


experiment = client.get_model_instance(Experiment, experiment_urn)
pp.pprint(attr.asdict(experiment))
51 changes: 51 additions & 0 deletions examples/scoreset_create.py
@@ -0,0 +1,51 @@
import attr, os

from mavetools.client.client import Client
from mavetools.models.licence import Licence
from mavetools.models.scoreset import NewScoreSet, NewScoreSetRequest, ScoreSet
from mavetools.models.target import NewTarget, ReferenceGenome, ReferenceMap, SequenceOffset


base_url = os.getenv('MAVEDB_BASE_URL', '')
scoreset_urn = 'urn:mavedb:00000001-a-1'
experiment_urn = 'urn:mavedb:00000001-a'

# Generate a new auth_token in your profile and post it here
auth_token = 'AseyaNLLhqv9jAm0joMkq2oqB0bw3GKxTclkT2NtG340RF6CfdM2UC3j8Fv4RpbQ'
client = Client(base_url, auth_token=auth_token) if base_url else Client(auth_token=auth_token)

# POST
# Change this dir string as needed. It's currently configured for running
# inside a Docker container that mounts the home directory as a volume.
test_file_dir = '/mavetools/tests/test_upload_scoreset/test_files'
new_scoreset = NewScoreSet(
title='test_title',
short_description='test_short_description',
abstract_text='test_abstract_text',

experiment=experiment_urn,
score_data=f"{test_file_dir}/test_score_data.csv",
count_data=f"{test_file_dir}/test_count.csv",
meta_data=f"{test_file_dir}/test_metadata.json",
licence=Licence(short_name='CC BY 4.0'),

sra_ids=['SRP109119'],
pubmed_ids=['23035249'],
doi_ids=['10.1038/s41467-019-11526-w'],
)
new_scoreset_request = NewScoreSetRequest(
scoreset=new_scoreset,
target=NewTarget(
name='test_target_name',
type='Protein coding',
sequence_type='Infer',
fasta_file=f"{test_file_dir}/test_fasta_file.fasta"
),
uniprot=SequenceOffset(offset=1, identifier='P63165'),
ensembl=SequenceOffset(offset=1, identifier='ENSG00000116030'),
refseq=SequenceOffset(offset=1, identifier='NM_001005781.1'),
reference_maps=[
ReferenceMap(genome=ReferenceGenome(short_name='hg16'))
]
)
client.post_model_instance(new_scoreset_request)
18 changes: 18 additions & 0 deletions examples/scoreset_get.py
@@ -0,0 +1,18 @@
import attr, os
from pprint import PrettyPrinter

from mavetools.client.client import Client
from mavetools.models.scoreset import ScoreSet


pp = PrettyPrinter(indent=2)
base_url = os.getenv('MAVEDB_BASE_URL', '')
scoreset_urn = 'urn:mavedb:00000001-a-1'

# Generate a new auth_token in your profile and post it here
auth_token = 'AseyaNLLhqv9jAm0joMkq2oqB0bw3GKxTclkT2NtG340RF6CfdM2UC3j8Fv4RpbQ'
client = Client(base_url, auth_token=auth_token) if base_url else Client(auth_token=auth_token)

# GET
scoreset = client.get_model_instance(ScoreSet, scoreset_urn)
pp.pprint(attr.asdict(scoreset))
Empty file added mavetools/client/__init__.py
Empty file.
70 changes: 70 additions & 0 deletions mavetools/client/client.py
@@ -0,0 +1,70 @@
import json
import logging
import requests
import sys

class Client():
def __init__(self, base_url='http://127.0.0.1:8000/api/', auth_token=''):
self.base_url = base_url
if auth_token:
self.auth_token = auth_token

class AuthTokenMissingException(Exception):
pass

def get_model_instance(self, model_class, instance_id):
"""
Using a GET, hit an API endpoint to get info on a particular instance
of a model class such as a ScoreSet.
This will perform the HTTP GET request and then let the class itself
parse the JSON data.
Parameters
----------
model_class : ModelClass
The model class we want to which we want to cast the response.
instance_id : str
The id of the object we are retrieving.
Returns
-------
model_instance
An instance of the passed class.
Raises
------
ValueError
If any mandatory fields are missing.
"""
model_url = f"{self.base_url}{model_class.api_url()}"
instance_url = f"{model_url}{instance_id}/"
try:
r = requests.get(instance_url)
r.raise_for_status()
except requests.exceptions.HTTPError as e:
logging.error(r.json())
raise SystemExit(e)
return model_class.deserialize(r.json())

def post_model_instance(self, model_instance):
model_class = type(model_instance)
model_url = f"{self.base_url}{model_class.api_url()}/"
payload, files = model_instance.post_payload()
if not self.auth_token:
error_message = 'Need to include an auth token for POST requests!'
logging.error(error_message)
raise AuthTokenMissingException(error_message)
try:
r = requests.post(
model_url,
data={
'request': json.dumps(payload)
},
files=files,
headers={'Authorization': (self.auth_token)}
)
r.raise_for_status()
except requests.exceptions.HTTPError as e:
logging.error(r.text)
sys.exit(1)
logging.info(f"Successfully uploaded {model_instance}!")
Empty file added mavetools/models/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions mavetools/models/base.py
@@ -0,0 +1,3 @@
class APIObject():
def api_url() -> str:
pass
57 changes: 57 additions & 0 deletions mavetools/models/dataset.py
@@ -0,0 +1,57 @@
import attr
from typing import Any, Dict, List, Optional

from .external_identifier import ExternalIdentifier


@attr.s
class Urn():
urn: str = attr.ib(kw_only=True)


@attr.s
class Keyword():
text: str = attr.ib(kw_only=True)


@attr.s
class TimeStamped():
creation_date: str = attr.ib(kw_only=True)
modification_date: str = attr.ib(kw_only=True)


@attr.s
class Dataset(TimeStamped, Urn):
publish_date: str = attr.ib(kw_only=True)
created_by: str = attr.ib(kw_only=True)
modified_by: str = attr.ib(kw_only=True)
approved: Optional[str] = attr.ib(kw_only=True, default=None)
private: Optional[bool] = attr.ib(kw_only=True, default=None)
last_child_value: Optional[Any] = attr.ib(kw_only=True, default=None)

extra_metadata: Optional[Dict[str, str]] = attr.ib(kw_only=True, default=None)
abstract_text: str = attr.ib(kw_only=True)
method_text: str = attr.ib(kw_only=True)
short_description: str = attr.ib(kw_only=True)
title: str = attr.ib(kw_only=True)
keywords: List[Keyword] = attr.ib(kw_only=True)
sra_ids: Optional[List[ExternalIdentifier]] = attr.ib(kw_only=True, default=None)
doi_ids: Optional[List[ExternalIdentifier]] = attr.ib(kw_only=True, default=None)
pubmed_ids: Optional[List[ExternalIdentifier]] = attr.ib(kw_only=True, default=None)
contributors: List[str] = attr.ib(kw_only=True)

def deserialize():
pass


@attr.s
class NewDataset():
title: str = attr.ib(kw_only=True)
short_description: str = attr.ib(kw_only=True)
abstract_text: Optional[str] = attr.ib(kw_only=True, default=None)
method_text: Optional[str] = attr.ib(kw_only=True, default=None)
keywords: Optional[List[Keyword]] = attr.ib(kw_only=True, default=None)
# TODO: change this once you know what this is supposed to be
doi_ids: Optional[List[str]] = attr.ib(kw_only=True, default=None)
sra_ids: Optional[List[str]] = attr.ib(kw_only=True, default=None)
pubmed_ids: Optional[List[str]] = attr.ib(kw_only=True, default=None)
46 changes: 46 additions & 0 deletions mavetools/models/experiment.py
@@ -0,0 +1,46 @@
import attr
from typing import List

from .base import APIObject
from .dataset import Dataset, NewDataset
from .utils import attrs_filter, attrs_serializer, prepare_for_encoding


@attr.s
class Experiment(APIObject, Dataset):
experimentset: str = attr.ib(kw_only=True, default=None)
scoresets: List[str] = attr.ib(kw_only=True, default=None)

def api_url() -> str:
return 'experiments/'

def api_id_field() -> str:
return 'urn'

def deserialize(json_dict):
"""
Takes a json dictionary and returns an instance of this class.
"""
return Experiment(**json_dict)


@attr.s
class NewExperiment(NewDataset):
experimentset: str = attr.ib(kw_only=True, default=None)

def api_url() -> str:
return 'experiments'

def post_payload(self):
"""
Use this to POST an instance of this class.
"""
json_dict, files = prepare_for_encoding(
attr.asdict(
self,
filter=attrs_filter,
retain_collection_types=True,
value_serializer=attrs_serializer
)
)
return json_dict, files
9 changes: 9 additions & 0 deletions mavetools/models/external_identifier.py
@@ -0,0 +1,9 @@
import attr
from typing import Optional

@attr.s
class ExternalIdentifier():
identifier: str = attr.ib(kw_only=True)
url: Optional[str] = attr.ib(kw_only=True, default=None)
dbversion: Optional[str] = attr.ib(kw_only=True, default=None)
dbname: Optional[str] = attr.ib(kw_only=True, default=None)
11 changes: 11 additions & 0 deletions mavetools/models/licence.py
@@ -0,0 +1,11 @@
import attr

@attr.s
class Licence():
long_name: str = attr.ib(kw_only=True, default=None)
short_name: str = attr.ib(kw_only=True)
link: str = attr.ib(kw_only=True, default=None)
version: str = attr.ib(kw_only=True, default=None)

def valid_short_names():
return ['CC0', 'CC BY-NC-SA 4.0', 'CC BY 4.0']

0 comments on commit 69f3dfd

Please sign in to comment.