Skip to content

Commit

Permalink
Merge pull request #387 from bento-platform/features/typed-extra-prop…
Browse files Browse the repository at this point in the history
…erties

Features/typed extra properties
  • Loading branch information
davidlougheed committed May 19, 2023
2 parents 19037c4 + 5998c4e commit 807fe57
Show file tree
Hide file tree
Showing 24 changed files with 800 additions and 143 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
name: Set up Python
with:
python-version: "3.8"
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ config.json

# MacOS
.DS_Store

tmp/
chord_metadata_service/vrs
12 changes: 12 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Django Test",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/manage.py",
"args": [
"test",
"${relativeFileDirname}"
],
"django": true,
"justMyCode": true
},
{
"name": "Python: Run Django",
"type": "python",
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ CANDIG_OPA_SITE_ADMIN_KEY=
INSIDE_CANDIG=
```


## Authentication

Default authentication can be set globally in `settings.py`
Expand Down
23 changes: 21 additions & 2 deletions chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@
from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, JSONLDDatasetRenderer, RDFDatasetRenderer
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination

from .models import Project, Dataset, TableOwnership, Table
from .models import Project, Dataset, ProjectJsonSchema, TableOwnership, Table
from .permissions import OverrideOrSuperUserOnly
from .serializers import ProjectSerializer, DatasetSerializer, TableOwnershipSerializer, TableSerializer
from .serializers import (
ProjectJsonSchemaSerializer,
ProjectSerializer,
DatasetSerializer,
TableOwnershipSerializer,
TableSerializer
)
from .filters import AuthorizedDatasetFilter

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,3 +115,16 @@ def destroy(self, request, *args, **kwargs):
logger.info(f"Cleanup: removed {n_removed} objects in total")

return Response(status=status.HTTP_204_NO_CONTENT)


class ProjectJsonSchemaViewSet(CHORDPublicModelViewSet):
"""
get:
Return list of ProjectJsonSchema
post:
Create a new ProjectJsonSchema
"""

queryset = ProjectJsonSchema.objects.all().order_by("project_id")
serializer_class = ProjectJsonSchemaSerializer
32 changes: 24 additions & 8 deletions chord_metadata_service/chord/ingest/phenopackets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

from dateutil.parser import isoparse
from decimal import Decimal

from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET
from chord_metadata_service.chord.models import Table
from chord_metadata_service.chord.models import Project, ProjectJsonSchema, Table
from chord_metadata_service.phenopackets import models as pm
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.patients.values import KaryotypicSex
from chord_metadata_service.restapi.schema_utils import patch_project_schemas
from chord_metadata_service.restapi.utils import iso_duration_to_years

from .exceptions import IngestError
Expand Down Expand Up @@ -58,9 +58,11 @@ def get_or_create_phenotypic_feature(pf: dict) -> pm.PhenotypicFeature:
return pf_obj


def validate_phenopacket(phenopacket_data: dict[str, Any], idx: Optional[int] = None) -> None:
def validate_phenopacket(phenopacket_data: dict[str, Any],
schema: dict = PHENOPACKET_SCHEMA,
idx: Optional[int] = None) -> None:
# Validate phenopacket data against phenopackets schema.
validation = schema_validation(phenopacket_data, PHENOPACKET_SCHEMA)
validation = schema_validation(phenopacket_data, schema)
if not validation:
# TODO: Report more precise errors
raise IngestError(
Expand Down Expand Up @@ -196,14 +198,17 @@ def get_or_create_hts_file(hts_file) -> pm.HtsFile:
return hts_file


def ingest_phenopacket(phenopacket_data: dict[str, Any], table_id: str, validate: bool = True,
def ingest_phenopacket(phenopacket_data: dict[str, Any],
table_id: str,
json_schema: dict = PHENOPACKET_SCHEMA,
validate: bool = True,
idx: Optional[int] = None) -> pm.Phenopacket:
"""Ingests a single phenopacket."""

if validate:
# Validate phenopacket data against phenopackets schema prior to ingestion, if specified.
# `validate` may be false if the phenopacket has already been validated.
validate_phenopacket(phenopacket_data, idx)
validate_phenopacket(phenopacket_data, json_schema, idx)

# Rough phenopackets structure:
# id: ...
Expand Down Expand Up @@ -304,8 +309,19 @@ def ingest_phenopacket_workflow(workflow_outputs, table_id) -> Union[list[pm.Phe
with open(json_doc_path, "r") as jf:
json_data = json.load(jf)

project_id = Project.objects.get(datasets__table_ownership=table_id)
project_schemas = ProjectJsonSchema.objects.filter(project_id=project_id).values(
"json_schema",
"required",
"schema_type",
)

# Map with key:schema_type and value:json_schema
extension_schemas = {proj_schema["schema_type"].lower(): proj_schema for proj_schema in project_schemas}
json_schema = patch_project_schemas(PHENOPACKET_SCHEMA, extension_schemas)

# First, validate all phenopackets
map_if_list(validate_phenopacket, json_data)
map_if_list(validate_phenopacket, json_data, json_schema)

# Then, actually try to ingest them (if the validation passes); we don't need to re-do validation here.
return map_if_list(ingest_phenopacket, json_data, table_id, validate=False)
return map_if_list(ingest_phenopacket, json_data, table_id, json_schema=json_schema, validate=False)
29 changes: 29 additions & 0 deletions chord_metadata_service/chord/migrations/0005_v3_0_0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2 on 2023-05-11 19:39

from django.db import migrations, models
import django.db.models.deletion
import uuid


class Migration(migrations.Migration):

dependencies = [
('chord', '0004_v2_14_0'),
]

operations = [
migrations.CreateModel(
name='ProjectJsonSchema',
fields=[
('id', models.CharField(default=uuid.uuid4, editable=False, max_length=200, primary_key=True, serialize=False)),
('required', models.BooleanField(default=False, help_text='Determines if the extra_properties field is required or not.')),
('json_schema', models.JSONField()),
('schema_type', models.CharField(choices=[('PHENOPACKET', 'Phenopacket'), ('BIOSAMPLE', 'Biosample'), ('INDIVIDUAL', 'Individual')], max_length=200)),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='project_schemas', to='chord.project')),
],
),
migrations.AddConstraint(
model_name='projectjsonschema',
constraint=models.UniqueConstraint(fields=('project', 'schema_type'), name='unique_project_schema'),
),
]
52 changes: 48 additions & 4 deletions chord_metadata_service/chord/models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import collections
import uuid

from django.core.exceptions import ValidationError
from django.db import models
from django.utils import timezone
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.patients.models import Individual
from chord_metadata_service.phenopackets.models import Biosample, Phenopacket
from chord_metadata_service.resources.models import Resource

from ..restapi.models import SchemaType
from .data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPE_MCODEPACKET


__all__ = ["Project", "Dataset", "TableOwnership", "Table"]
__all__ = ["Project", "Dataset", "TableOwnership", "Table", "ProjectJsonSchema"]


def version_default():
Expand Down Expand Up @@ -210,3 +210,47 @@ def dataset(self):

def __str__(self):
return f"{self.name} (ID: {self.ownership_record.table_id}, Type: {self.data_type})"


class ProjectJsonSchema(models.Model):
id = models.CharField(primary_key=True, max_length=200, default=uuid.uuid4, editable=False)
project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name="project_schemas")
required = models.BooleanField(default=False,
help_text="Determines if the extra_properties field is required or not.")
json_schema = models.JSONField()
schema_type = models.CharField(max_length=200, choices=SchemaType.choices)

def clean(self):
"""
Creation of ProjectJsonSchema is prohibited if the target project already
contains data matching the schema_type
"""

super().clean()

target_count = 0
if self.schema_type == SchemaType.PHENOPACKET:
target_count = Phenopacket.objects.filter(
table__ownership_record__dataset__project_id=self.project_id
).count()
elif self.schema_type == SchemaType.INDIVIDUAL:
target_count = Individual.objects.filter(
phenopackets__table__ownership_record__dataset__project_id=self.project_id
).count()
elif self.schema_type == SchemaType.BIOSAMPLE:
target_count = Biosample.objects.filter(
individual__phenopackets__table__ownership_record__dataset__project_id=self.project_id
).count()

if target_count > 0:
raise ValidationError(f"Project {self.project_id} already contains data for {self.schema_type}")

def save(self, *args, **kwargs):
# Override in order to call self.clean to validate data
self.clean()
return super().save(*args, **kwargs)

class Meta:
constraints = [
models.UniqueConstraint(fields=["project", "schema_type"], name="unique_project_schema")
]
11 changes: 10 additions & 1 deletion chord_metadata_service/chord/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from chord_metadata_service.restapi.dats_schemas import get_dats_schema, CREATORS
from chord_metadata_service.restapi.utils import transform_keys

from .models import Project, Dataset, TableOwnership, Table
from .models import Project, Dataset, ProjectJsonSchema, TableOwnership, Table
from .schemas import LINKED_FIELD_SETS_SCHEMA


Expand Down Expand Up @@ -139,10 +139,19 @@ class Meta:
fields = '__all__'


class ProjectJsonSchemaSerializer(GenericSerializer):
id = serializers.CharField(read_only=True)

class Meta:
model = ProjectJsonSchema
fields = "__all__"


class ProjectSerializer(serializers.ModelSerializer):
# Don't inherit GenericSerializer to not pop empty fields

datasets = DatasetSerializer(read_only=True, many=True, exclude_when_nested=["project"])
project_schemas = ProjectJsonSchemaSerializer(read_only=True, many=True)

# noinspection PyMethodMayBeStatic
def validate_title(self, value):
Expand Down
23 changes: 23 additions & 0 deletions chord_metadata_service/chord/tests/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import uuid

from chord_metadata_service.restapi.models import SchemaType

from ..data_types import DATA_TYPE_PHENOPACKET

__all__ = [
Expand Down Expand Up @@ -73,6 +75,15 @@
}
]

DEFAULT_PROJECT_JSON_SCHEMA = {
"type": "object",
"properties": {
"string_prop": {"type": "string"},
"bool_prop": {"type": "boolean"},
"obj_prop": {"type": "object"}
}
}


def valid_dataset_1(project_id):
return {
Expand Down Expand Up @@ -101,6 +112,18 @@ def valid_table_1(dataset_id, model_compatible=False):
)


def valid_project_json_schema(project_id: str,
schema_type=SchemaType.PHENOPACKET,
required: bool = False,
json_schema: dict = DEFAULT_PROJECT_JSON_SCHEMA):
return {
"project": project_id,
"required": required,
"schema_type": schema_type,
"json_schema": json_schema
}


def dats_dataset(project_id, creators):
return {
"version": "1.0",
Expand Down
35 changes: 35 additions & 0 deletions chord_metadata_service/chord/tests/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from uuid import uuid4
from django.test import TestCase
from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET

from chord_metadata_service.chord.models import Dataset, Project, Table, TableOwnership
from chord_metadata_service.chord.tests.constants import VALID_DATA_USE_1


class ProjectTestCase(TestCase):
"""
Helper TransactionTestCase class that creates a Project, Dataset, TableOwnership and Table.
Data is created once for the whole test case at the class level
"""

@classmethod
def setUpTestData(cls) -> None:
cls.project = Project.objects.create(title="Project 1", description="")
cls.dataset = Dataset.objects.create(
title="Dataset 1",
description="Some dataset",
data_use=VALID_DATA_USE_1,
project=cls.project
)
cls.table_ownership = TableOwnership.objects.create(
table_id=str(uuid4()),
service_id=str(uuid4()),
service_artifact="variant",
dataset=cls.dataset
)
cls.table = Table.objects.create(
ownership_record=cls.table_ownership,
name="Table 1",
data_type=DATA_TYPE_PHENOPACKET
)
return super().setUpTestData()

0 comments on commit 807fe57

Please sign in to comment.