Skip to content

Commit

Permalink
Merge pull request #477 from bento-platform/feat/exp-res-index-url
Browse files Browse the repository at this point in the history
feat(experiments): add experiment result indices field + fix url field
  • Loading branch information
davidlougheed committed Feb 1, 2024
2 parents 2142e8e + 96560c3 commit 50f7de1
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 16 deletions.
2 changes: 2 additions & 0 deletions chord_metadata_service/chord/ingest/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
identifier=er.get("identifier"),
description=er.get("description"),
filename=er.get("filename"),
url=er.get("url"),
indices=er.get("indices", []),
genome_assembly_id=er.get("genome_assembly_id"),
file_format=er.get("file_format"),
data_output_type=er.get("data_output_type"),
Expand Down
10 changes: 10 additions & 0 deletions chord_metadata_service/experiments/descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@
"description": "Description of an experiment result.",
"filename": "The name of the file containing the result.",
"url": "A URL pointing to the file containing the result.",
"indices": {
"description": "An array of indices for the file containing the result.",
"items": {
"description": "An object describing an index for the file containing the result.",
"properties": {
"url": "A URL pointing to the index for the file containg the result.",
"format": "The index format of the file stored at the index URL.",
},
},
},
"genome_assembly_id": "Reference genome assembly ID.",
"file_format": "(Controlled Vocabulary) File format.",
"data_output_type": "The type of data output: Raw or Derived data."
Expand Down
5 changes: 5 additions & 0 deletions chord_metadata_service/experiments/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class ExperimentResultFilter(django_filters.rest_framework.FilterSet):
identifier = django_filters.CharFilter(lookup_expr='exact')
description = django_filters.CharFilter(lookup_expr='icontains')
filename = django_filters.CharFilter(lookup_expr='icontains')
url = django_filters.CharFilter(lookup_expr='icontains')
indices = django_filters.CharFilter(method="filter_indices", label="Indices")
genome_assembly_id = django_filters.CharFilter(lookup_expr='iexact')
file_format = django_filters.CharFilter(lookup_expr='iexact')
data_output_type = django_filters.CharFilter(lookup_expr='icontains')
Expand All @@ -49,5 +51,8 @@ class Meta:
model = ExperimentResult
exclude = ["creation_date", "created", "updated"]

def filter_indices(self, qs, name, value):
return qs.filter(indices__icontains=value)

def filter_extra_properties(self, qs, name, value):
return qs.filter(extra_properties__icontains=value)
8 changes: 7 additions & 1 deletion chord_metadata_service/experiments/migrations/0010_v7_0_0.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by Django 4.2.7 on 2024-01-24 15:48
# Generated by Django 4.2.9 on 2024-01-31 18:56

import chord_metadata_service.restapi.validators
from django.db import migrations, models


Expand All @@ -15,4 +16,9 @@ class Migration(migrations.Migration):
name='url',
field=models.CharField(blank=True, help_text='A URL pointing to the file containing the result.', max_length=500, null=True),
),
migrations.AddField(
model_name='experimentresult',
name='indices',
field=models.JSONField(blank=True, default=list, help_text='An array of indices for the file containing the result.', validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/experiments/experiment_result_file_index_list', '$schema': 'http://json-schema.org/draft-07/schema#', 'description': 'Schema for describing a list of object representing an indices of an experiment result file.', 'items': {'$id': '/chord_metadata_service/experiments/experiment_result_file_index', '$schema': 'http://json-schema.org/draft-07/schema#', 'description': 'Schema for describing an object representing an index of an experiment result file.', 'properties': {'format': {'enum': ['BAI', 'BGZF', 'CRAI', 'CSI', 'TABIX', 'TRIBBLE'], 'type': 'string'}, 'url': {'$id': '/chord_metadata_service/experiments/data_file_or_record_url', '$schema': 'http://json-schema.org/draft-07/schema#', 'description': 'A URL of a particular scheme, pointing to a data file OR a DRS record which itself points to a data file.', 'format': 'uri', 'pattern': '^(data|doi|drs|file|ftp|https?|s3)://', 'title': 'Data file or record URL', 'type': 'string'}}, 'required': ['url', 'format'], 'title': 'Experiment result file index schema', 'type': 'object'}, 'title': 'Experiment result file index list schema', 'type': 'array'}, formats=None)]),
),
]
11 changes: 10 additions & 1 deletion chord_metadata_service/experiments/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from chord_metadata_service.restapi.description_utils import rec_help
from chord_metadata_service.restapi.validators import ontology_list_validator, key_value_validator
from chord_metadata_service.phenopackets.models import Biosample
import chord_metadata_service.experiments.descriptions as d

from . import descriptions as d
from .validators import file_index_list_validator

__all__ = ["Experiment", "ExperimentResult", "Instrument"]

Expand Down Expand Up @@ -85,7 +87,14 @@ class ExperimentResult(models.Model, IndexableMixin):
help_text=rec_help(d.EXPERIMENT_RESULT, "description"))
filename = CharField(max_length=500, blank=True, null=True,
help_text=rec_help(d.EXPERIMENT_RESULT, "filename"))
# URLs:
# - one file for the experiment result file proper
url = CharField(max_length=500, blank=True, null=True, help_text=rec_help(d.EXPERIMENT_RESULT, "url"))
# - an array of index file objects (e.g., FAI, Tabix, Tribble, BGZF), formatted like
# { "url": "...", "format": "FAI" | "TABIX" | "TRIBBLE " | ... }
indices = JSONField(blank=True, default=list, validators=[file_index_list_validator],
help_text=rec_help(d.EXPERIMENT_RESULT, "indices"))

genome_assembly_id = CharField(max_length=50, blank=True, null=True,
help_text=rec_help(d.EXPERIMENT_RESULT, "genome_assembly_id"))
file_format = CharField(max_length=50, blank=True, null=True,
Expand Down
66 changes: 56 additions & 10 deletions chord_metadata_service/experiments/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,60 @@
from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe, get_schema_app_id, sub_schema_uri

__all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
__all__ = [
"DATA_FILE_OR_RECORD_URL_SCHEMA",
"EXPERIMENT_RESULT_FILE_INDEX_SCHEMA",
"EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA",
"EXPERIMENT_SCHEMA",
"EXPERIMENT_RESULT_SCHEMA",
"INSTRUMENT_SCHEMA",
]

base_uri = get_schema_app_id(Path(__file__).parent.name)

DATA_FILE_OR_RECORD_URL_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": sub_schema_uri(base_uri, "data_file_or_record_url"),
"title": "Data file or record URL",
"description": "A URL of a particular scheme, pointing to a data file OR a DRS record which itself points to a "
"data file.",
"type": "string",
"format": "uri",
# only supported schemes allowed, in alphabetical order:
"pattern": r"^(data|doi|drs|file|ftp|https?|s3)://",
}

EXPERIMENT_RESULT_FILE_INDEX_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": sub_schema_uri(base_uri, "experiment_result_file_index"),
"title": "Experiment result file index schema",
"description": "Schema for describing an object representing an index of an experiment result file.",
"type": "object",
"properties": {
"url": DATA_FILE_OR_RECORD_URL_SCHEMA,
"format": {
"type": "string",
"enum": [
"BAI", # BAM index files ( http://samtools.github.io/hts-specs/SAMv1.pdf "BAI" )
"BGZF", # BGZip index files (often .gzi)
"CRAI", # CRAM index files ( https://samtools.github.io/hts-specs/CRAMv3.pdf "CRAM index" )
"CSI", # See http://samtools.github.io/hts-specs/CSIv1.pdf
"TABIX", # See https://samtools.github.io/hts-specs/tabix.pdf
"TRIBBLE",
],
}
},
"required": ["url", "format"],
}
EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": sub_schema_uri(base_uri, "experiment_result_file_index_list"),
"title": "Experiment result file index list schema",
"description": "Schema for describing a list of object representing an indices of an experiment result file.",
"type": "array",
"items": EXPERIMENT_RESULT_FILE_INDEX_SCHEMA,
}

EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": sub_schema_uri(base_uri, "experiment_result"),
Expand All @@ -23,20 +73,16 @@
"filename": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri",
# only supported schemes allowed, in alphabetical order:
"pattern": r"^(data|doi|drs|file|ftp|https?|s3)://",
},
"url": DATA_FILE_OR_RECORD_URL_SCHEMA,
"indices": EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA,
"genome_assembly_id": {
"type": "string",
},
"file_format": {
"type": "string",
"enum": ["SAM", "BAM", "CRAM", "BAI", "CRAI", "VCF", "BCF", "MAF", "GVCF", "BigWig", "BigBed", "FASTA",
"FASTQ", "TAB", "SRA", "SRF", "SFF", "GFF", "TABIX", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF",
"MARKDOWN", "MP3", "M4A", "MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"]
"enum": ["SAM", "BAM", "CRAM", "VCF", "BCF", "MAF", "GVCF", "BigWig", "BigBed", "FASTA", "FASTQ", "TAB",
"SRA", "SRF", "SFF", "GFF", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF", "MARKDOWN", "MP3", "M4A",
"MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"]
},
"data_output_type": {
"type": "string",
Expand Down
22 changes: 18 additions & 4 deletions chord_metadata_service/experiments/search_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,31 @@
"url": {
"search": search_optional_str(3)
},
"indices": {
"items": {
"properties": {
"url": {
"search": search_optional_str(0),
},
"format": {
"search": search_optional_eq(1),
},
},
"search": {"database": {"type": "jsonb"}},
},
"search": {"order": 4, "database": {"type": "jsonb"}}
},
"file_format": {
"search": search_optional_eq(4)
"search": search_optional_eq(5)
},
"data_output_type": {
"search": search_optional_eq(5)
"search": search_optional_eq(6)
},
"usage": {
"search": search_optional_str(6)
"search": search_optional_str(7)
},
"genome_assembly_id": {
"search": search_optional_eq(7)
"search": search_optional_eq(8)
},
},
"search": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
"description": "test",
"filename": "sample1_01.vcf.gz",
"url": "http://example.org/sample1_01.vcf.gz",
"indices": [
{"url": "http://example.org/sample1_01.vcf.gz.tbi", "format": "TABIX"}
],
"file_format": "VCF",
"data_output_type": "Derived data",
"usage": "Visualized",
Expand Down
14 changes: 14 additions & 0 deletions chord_metadata_service/experiments/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ def test_filter_experiment_results(self):
self.assertEqual(response_data["count"], 2)
self.assertEqual(len(response_data["results"]), 2)

def test_filter_experiment_results_url(self):
response = self.client.get('/api/experimentresults?url=example.org')
self.assertEqual(response.status_code, status.HTTP_200_OK)
response_data = response.json()
self.assertEqual(response_data["count"], 1)
self.assertEqual(len(response_data["results"]), 1)

def test_filter_experiment_results_indices(self):
response = self.client.get('/api/experimentresults?indices=tabix')
self.assertEqual(response.status_code, status.HTTP_200_OK)
response_data = response.json()
self.assertEqual(response_data["count"], 1)
self.assertEqual(len(response_data["results"]), 1)

def test_filter_experiment_results_by_dataset_1(self):
response = self.client.get('/api/experimentresults?datasets=dataset_1')
self.assertEqual(response.status_code, status.HTTP_200_OK)
Expand Down
6 changes: 6 additions & 0 deletions chord_metadata_service/experiments/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from chord_metadata_service.restapi.validators import JsonSchemaValidator
from .schemas import EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA

__all__ = ["file_index_list_validator"]

file_index_list_validator = JsonSchemaValidator(EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA)
9 changes: 9 additions & 0 deletions chord_metadata_service/restapi/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
)


__all__ = [
"JsonSchemaValidator",
"age_or_age_range_validator",
"ontology_validator",
"ontology_list_validator",
"key_value_validator",
]


class JsonSchemaValidator:
""" Custom class based validator to validate against Json schema for JSONField """

Expand Down

0 comments on commit 50f7de1

Please sign in to comment.