Skip to content

Commit

Permalink
Merge pull request #409 from bento-platform/features/tables-removal
Browse files Browse the repository at this point in the history
feat!: remove tables concept
  • Loading branch information
v-rocheleau committed Aug 30, 2023
2 parents b5190c0 + 85d9350 commit 169f0fc
Show file tree
Hide file tree
Showing 83 changed files with 1,283 additions and 1,331 deletions.
12 changes: 1 addition & 11 deletions chord_metadata_service/chord/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.contrib import admin

from .models import Project, Dataset, TableOwnership, Table
from .models import Project, Dataset


@admin.register(Project)
Expand All @@ -11,13 +11,3 @@ class ProjectAdmin(admin.ModelAdmin):
@admin.register(Dataset)
class DatasetAdmin(admin.ModelAdmin):
pass


@admin.register(TableOwnership)
class TableOwnershipAdmin(admin.ModelAdmin):
pass


@admin.register(Table)
class TableAdmin(admin.ModelAdmin):
pass
61 changes: 15 additions & 46 deletions chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import logging
import json

from asgiref.sync import async_to_sync, sync_to_async

from rest_framework import status, viewsets
from rest_framework.permissions import BasePermission, SAFE_METHODS
from rest_framework.response import Response
from rest_framework.settings import api_settings
from rest_framework.decorators import action

from django_filters.rest_framework import DjangoFilterBackend
from chord_metadata_service.cleanup.run_all import run_all_cleanup

from chord_metadata_service.cleanup import run_all_cleanup
from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, JSONLDDatasetRenderer, RDFDatasetRenderer
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination

from .models import Project, Dataset, ProjectJsonSchema, TableOwnership, Table
from .models import Project, Dataset, ProjectJsonSchema
from .permissions import OverrideOrSuperUserOnly
from .serializers import (
ProjectJsonSchemaSerializer,
ProjectSerializer,
DatasetSerializer,
TableOwnershipSerializer,
TableSerializer
DatasetSerializer
)
from .filters import AuthorizedDatasetFilter

logger = logging.getLogger(__name__)


__all__ = ["ProjectViewSet", "DatasetViewSet", "TableOwnershipViewSet", "TableViewSet"]
__all__ = ["ProjectViewSet", "DatasetViewSet"]


class ReadOnly(BasePermission):
Expand Down Expand Up @@ -69,13 +69,14 @@ class DatasetViewSet(CHORDPublicModelViewSet):

filter_backends = [DjangoFilterBackend]
filterset_class = AuthorizedDatasetFilter
lookup_url_kwarg = "dataset_id"

serializer_class = DatasetSerializer
renderer_classes = tuple(CHORDModelViewSet.renderer_classes) + (JSONLDDatasetRenderer, RDFDatasetRenderer,)
queryset = Dataset.objects.all().order_by("title")

@action(detail=True, methods=['get'])
def dats(self, request, pk=None):
def dats(self, _request, *_args, **_kwargs):
"""
Retrieve a specific DATS file for a given dataset.
Expand All @@ -84,48 +85,16 @@ def dats(self, request, pk=None):
dataset = self.get_object()
return Response(json.loads(dataset.dats_file))

@async_to_sync
async def destroy(self, request, *args, **kwargs):
get_obj_async = sync_to_async(self.get_object)

class TableOwnershipViewSet(CHORDPublicModelViewSet):
"""
get:
Return a list of table-(dataset|dataset,biosample) relationships
post:
Create a new relationship between a dataset (and optionally a specific biosample) and a table
in a data service
"""

queryset = TableOwnership.objects.all().order_by("table_id")
serializer_class = TableOwnershipSerializer


class TableViewSet(CHORDPublicModelViewSet):
"""
get:
Return a list of tables
post:
Create a new table
"""
dataset = await get_obj_async()
await dataset.adelete()

# TODO: Create TableOwnership if needed - here or model?

queryset = Table.objects.all().prefetch_related("ownership_record").order_by("ownership_record_id")
serializer_class = TableSerializer

def destroy(self, request, *args, **kwargs):
# First, delete the table record itself
# - use the cascade from the ownership record rather than the default DRF behaviour
table = self.get_object()
table_id = table.ownership_record_id
table.ownership_record.delete()
table.delete()

# Then, run cleanup
logger.info(f"Running cleanup after deleting table {table_id} via DRF API")
n_removed = run_all_cleanup()
logger.info(f"Running cleanup after deleting dataset {dataset.identifier} via DRF API")
n_removed = await run_all_cleanup()
logger.info(f"Cleanup: removed {n_removed} objects in total")

return Response(status=status.HTTP_204_NO_CONTENT)


Expand Down
6 changes: 3 additions & 3 deletions chord_metadata_service/chord/export/cbioportal.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ def study_export(get_path: Callable[[str], str], dataset_id: str):
# Export patients.
with open(get_path(PATIENT_DATA_FILENAME), "w", newline="\n") as file_patient:
# Note: plural in `phenopackets` is intentional (related_name property in model)
indiv = Individual.objects.filter(phenopackets__table__ownership_record__dataset_id=dataset.identifier)
indiv = Individual.objects.filter(phenopackets__dataset_id=dataset.identifier)
individual_export(indiv, file_patient)

with open(get_path(PATIENT_META_FILENAME), "w", newline="\n") as file_patient_meta:
clinical_meta_export(cbio_study_id, PATIENT_DATATYPE, file_patient_meta)

# Export samples
with open(get_path(SAMPLE_DATA_FILENAME), "w", newline="\n") as file_sample:
sampl = pm.Biosample.objects.filter(phenopacket__table__ownership_record__dataset_id=dataset.identifier)
sampl = pm.Biosample.objects.filter(phenopacket__dataset_id=dataset.identifier)
sample_export(sampl, file_sample)

with open(get_path(SAMPLE_META_FILENAME), "w", newline="\n") as file_sample_meta:
Expand All @@ -109,7 +109,7 @@ def study_export(get_path: Callable[[str], str], dataset_id: str):
open(get_path(CASE_LIST_SEQUENCED), "w", newline="\n") as file_case_list:
exp_res = (
ExperimentResult.objects
.filter(experiment__table__ownership_record__dataset_id=dataset.identifier, file_format="MAF")
.filter(experiment__dataset_id=dataset.identifier, file_format="MAF")
.annotate(biosample_id=F("experiment__biosample"))
)

Expand Down
7 changes: 1 addition & 6 deletions chord_metadata_service/chord/export/metadata.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from chord_metadata_service.chord.models import Dataset, Project, Table
from chord_metadata_service.chord.models import Dataset, Project
from chord_metadata_service.chord.workflows.metadata import WORKFLOW_CBIOPORTAL

from .cbioportal import study_export as export_cbioportal_workflow

__all__ = [
"OBJECT_TYPE_PROJECT",
"OBJECT_TYPE_DATASET",
"OBJECT_TYPE_TABLE",

"EXPORT_OBJECT_TYPE",
"EXPORT_FORMATS",
Expand All @@ -17,7 +16,6 @@

OBJECT_TYPE_PROJECT = "project"
OBJECT_TYPE_DATASET = "dataset"
OBJECT_TYPE_TABLE = "table"

EXPORT_OBJECT_TYPE = {
OBJECT_TYPE_PROJECT: {
Expand All @@ -26,9 +24,6 @@
OBJECT_TYPE_DATASET: {
"model": Dataset
},
OBJECT_TYPE_TABLE: {
"model": Table
},
}

EXPORT_FORMATS = {WORKFLOW_CBIOPORTAL}
Expand Down
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/export/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def export(request: Request):
)

object_id = request.data["object_id"]
object_type: str = request.data["object_type"] # 'dataset', 'table',...
object_type: str = request.data["object_type"] # 'project', 'dataset',...

model = EXPORT_OBJECT_TYPE[object_type]["model"]
if not model.objects.filter(identifier=object_id).exists():
Expand Down Expand Up @@ -86,7 +86,7 @@ def export(request: Request):

# If no output path parameter has been provided, the generated export
# is returned as an attachment to the Response and everything will
# be cleaned afterwards.
# be cleaned afterward.
# Otherwise, the provided local path is under the responsibility of
# the caller
if not output_path:
Expand Down
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def authorize_datasets(qs, name, value):

class AuthorizedDatasetFilter(django_filters.rest_framework.FilterSet):
datasets = django_filters.CharFilter(
method=filter_datasets, field_name="table_ownership__dataset__title",
method=filter_datasets, field_name="dataset__title",
label="Datasets"
)
authorized_datasets = django_filters.CharFilter(
method=authorize_datasets, field_name="table_ownership__dataset__title",
method=authorize_datasets, field_name="dataset__title",
label="Authorized datasets"
)
27 changes: 7 additions & 20 deletions chord_metadata_service/chord/ingest/experiments.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from __future__ import annotations

import json
import uuid

from chord_metadata_service.chord.data_types import DATA_TYPE_EXPERIMENT
from chord_metadata_service.chord.models import Table, TableOwnership
from chord_metadata_service.chord.models import Dataset
from chord_metadata_service.experiments import models as em
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA
from chord_metadata_service.phenopackets import models as pm
Expand All @@ -14,7 +12,6 @@
from .logger import logger
from .resources import ingest_resource
from .schema import schema_validation
from .utils import get_output_or_raise, workflow_file_output_to_path

__all__ = [
"create_instrument",
Expand Down Expand Up @@ -69,7 +66,7 @@ def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:

def ingest_experiment(
experiment_data: dict,
table_id: str,
dataset_id: str,
validate: bool = True,
idx: Optional[int] = None,
) -> em.Experiment:
Expand Down Expand Up @@ -132,7 +129,7 @@ def ingest_experiment(
biosample=biosample,
instrument=instrument_db,
extra_properties=extra_properties,
table=Table.objects.get(ownership_record_id=table_id, data_type=DATA_TYPE_EXPERIMENT)
dataset=Dataset.objects.get(identifier=dataset_id)
)

# create m2m relationships
Expand All @@ -141,13 +138,8 @@ def ingest_experiment(
return new_experiment


def ingest_experiments_workflow(workflow_outputs, table_id: str) -> list[em.Experiment]:
with workflow_file_output_to_path(get_output_or_raise(workflow_outputs, "json_document")) as json_doc_path:
logger.info(f"Attempting ingestion of experiments from path: {json_doc_path}")
with open(json_doc_path, "r") as jf:
json_data: dict = json.load(jf)

dataset = TableOwnership.objects.get(table_id=table_id).dataset
def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experiment]:
dataset = Dataset.objects.get(identifier=dataset_id)

for rs in json_data.get("resources", []):
dataset.additional_resources.add(ingest_resource(rs))
Expand All @@ -159,7 +151,7 @@ def ingest_experiments_workflow(workflow_outputs, table_id: str) -> list[em.Expe
validate_experiment(exp, idx)

# Then, if everything passes, ingest the experiments. Don't re-do the validation in this case.
return [ingest_experiment(exp, table_id, validate=False) for exp in exps]
return [ingest_experiment(exp, dataset_id, validate=False) for exp in exps]


def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.ExperimentResult]:
Expand Down Expand Up @@ -212,10 +204,5 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime
# The table_id is required to fit the bento_ingest.schema.json in bento_lib,
# but it is unused. It can be set to any valid table_id or to one of the override
# values defined in view_ingest.py
def ingest_maf_derived_from_vcf_workflow(workflow_outputs, table_id: str) -> list[em.ExperimentResult]:
with workflow_file_output_to_path(get_output_or_raise(workflow_outputs, "json_document")) as json_doc_path:
logger.info(f"Attempting ingestion of MAF-derived-from-VCF JSON from path: {json_doc_path}")
with open(json_doc_path, "r") as fh:
json_data = json.load(fh)

def ingest_maf_derived_from_vcf_workflow(json_data, dataset_id: str) -> list[em.ExperimentResult]:
return ingest_derived_experiment_results(json_data)
23 changes: 5 additions & 18 deletions chord_metadata_service/chord/ingest/mcode.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
import json

from chord_metadata_service.mcode.mcode_ingest import ingest_mcodepacket
from chord_metadata_service.mcode.parse_fhir_mcode import parse_bundle

from .logger import logger
from .utils import get_output_or_raise, map_if_list, workflow_file_output_to_path

from .utils import map_if_list

def ingest_mcode_fhir_workflow(workflow_outputs, table_id):
with workflow_file_output_to_path(get_output_or_raise(workflow_outputs, "json_document")) as json_doc_path:
logger.info(f"Attempting ingestion of MCODE FIHR from path: {json_doc_path}")
with open(json_doc_path, "r") as jf:
json_data = json.load(jf)

def ingest_mcode_fhir_workflow(json_data, dataset_id):
mcodepacket = parse_bundle(json_data)
return ingest_mcodepacket(mcodepacket, table_id)

return ingest_mcodepacket(mcodepacket, dataset_id)

def ingest_mcode_workflow(workflow_outputs, table_id):
with workflow_file_output_to_path(get_output_or_raise(workflow_outputs, "json_document")) as json_doc_path:
logger.info(f"Attempting ingestion of MCODE from path: {json_doc_path}")
with open(json_doc_path, "r") as jf:
json_data = json.load(jf)

return map_if_list(ingest_mcodepacket, json_data, table_id)
def ingest_mcode_workflow(json_data, dataset_id):
return map_if_list(ingest_mcodepacket, json_data, dataset_id)
22 changes: 7 additions & 15 deletions chord_metadata_service/chord/ingest/phenopackets.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import json
import uuid

from dateutil.parser import isoparse
from decimal import Decimal
from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET
from chord_metadata_service.chord.models import Project, ProjectJsonSchema, Table
from chord_metadata_service.chord.models import Project, ProjectJsonSchema, Dataset
from chord_metadata_service.phenopackets import models as pm
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.patients.values import KaryotypicSex
Expand All @@ -15,10 +13,9 @@
from chord_metadata_service.restapi.utils import iso_duration_to_years

from .exceptions import IngestError
from .logger import logger
from .resources import ingest_resource
from .schema import schema_validation
from .utils import get_output_or_raise, map_if_list, query_and_check_nulls, workflow_file_output_to_path
from .utils import map_if_list, query_and_check_nulls

from typing import Any, Dict, Iterable, Optional, Union

Expand Down Expand Up @@ -200,7 +197,7 @@ def get_or_create_hts_file(hts_file) -> pm.HtsFile:


def ingest_phenopacket(phenopacket_data: dict[str, Any],
table_id: str,
dataset_id: str,
json_schema: dict = PHENOPACKET_SCHEMA,
validate: bool = True,
idx: Optional[int] = None) -> pm.Phenopacket:
Expand Down Expand Up @@ -288,7 +285,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
id=new_phenopacket_id,
subject=subject_obj,
meta_data=meta_data_obj,
table=Table.objects.get(ownership_record_id=table_id, data_type=DATA_TYPE_PHENOPACKET),
dataset=Dataset.objects.get(identifier=dataset_id),
)

# ... save it to the database...
Expand All @@ -304,13 +301,8 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
return new_phenopacket


def ingest_phenopacket_workflow(workflow_outputs, table_id) -> Union[list[pm.Phenopacket], pm.Phenopacket]:
with workflow_file_output_to_path(get_output_or_raise(workflow_outputs, "json_document")) as json_doc_path:
logger.info(f"Attempting ingestion of phenopackets from path: {json_doc_path}")
with open(json_doc_path, "r") as jf:
json_data = json.load(jf)

project_id = Project.objects.get(datasets__table_ownership=table_id)
def ingest_phenopacket_workflow(json_data, dataset_id) -> Union[list[pm.Phenopacket], pm.Phenopacket]:
project_id = Project.objects.get(datasets=dataset_id)
project_schemas: Iterable[ExtensionSchemaDict] = ProjectJsonSchema.objects.filter(project_id=project_id).values(
"json_schema",
"required",
Expand All @@ -328,4 +320,4 @@ def ingest_phenopacket_workflow(workflow_outputs, table_id) -> Union[list[pm.Phe
map_if_list(validate_phenopacket, json_data, json_schema)

# Then, actually try to ingest them (if the validation passes); we don't need to re-do validation here.
return map_if_list(ingest_phenopacket, json_data, table_id, json_schema=json_schema, validate=False)
return map_if_list(ingest_phenopacket, json_data, dataset_id, json_schema=json_schema, validate=False)

0 comments on commit 169f0fc

Please sign in to comment.