In [1]:
import pandas as pd
import yaml

with open("./local_configs/notebook_config.yaml", "r") as f:
    config = yaml.safe_load(f)

# paths to import files
schematic_config = config["paths"]["schematic"]
csv_model = config["file_names"]["csv_model"]
json_model = config["file_names"]["json_model"]

print(
    "Schematic config: ",
    schematic_config,
    "\n",
    "CSV model: ",
    csv_model,
    "\n",
    "JSON LD Model: ",
    json_model,
)


Schematic config:  ./config.yml 
 CSV model:  EL.data.model.csv 
 JSON LD Model:  EL.data.model.jsonld


In [8]:
print(f'schematic schema convert ../{csv_model} --output_jsonld ../{json_model}')

!schematic schema convert ../{csv_model} --output_jsonld ../{json_model}

schematic schema convert ../EL.data.model.csv --output_jsonld ../EL.data.model.jsonld
Starting schematic...
Done adding requirements and value ranges to attributes
The Data Model was created and saved to '../EL.data.model.jsonld' location.


## Get manifest names to generate manifests

In [2]:
import json

with open("../" + json_model, "r") as jf:
    jo = json.load(jf)

In [3]:
jo

{'@context': {'bts': 'http://schema.biothings.io/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
  'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
  'schema': 'http://schema.org/',
  'xsd': 'http://www.w3.org/2001/XMLSchema#'},
 '@graph': [{'@id': 'schema:Text',
   '@type': ['schema:DataType', 'rdfs:Class'],
   'rdfs:comment': 'Data type: Text.',
   'rdfs:label': 'Text'},
  {'@id': 'schema:Number',
   '@type': ['schema:DataType', 'rdfs:Class'],
   'rdfs:comment': 'Data type: Number.',
   'rdfs:label': 'Number'},
  {'@id': 'schema:Integer',
   '@type': 'rdfs:Class',
   'rdfs:comment': 'Data type: Integer.',
   'rdfs:label': 'Integer',
   'rdfs:subClassOf': {'@id': 'schema:Number'}},
  {'@id': 'schema:Thing',
   '@type': 'rdfs:Class',
   'rdfs:comment': 'Thing',
   'rdfs:label': 'Thing',
   'schema:isPartOf': {'@id': 'http://schema.org'}},
  {'@id': 'bts:BiologicalEntity',
   '@type': 'rdfs:Class',
   'rdfs:comment': None,
   'rdfs:label': 'BiologicalEntity',
   'rdfs:subClas

In [5]:
# Manifest names in data model
manifest_names_extracted = []

for i in jo["@graph"]:
    try: 
        if i['rdfs:subClassOf'][0]['@id'] == "bts:Template":
            print
            manifest_names_extracted.append(
                i["@id"].replace("bts:", "")
            )
    except: 
        pass 
# display names extracted
manifest_display_names_extracted = []

for i in jo["@graph"]:
    if i["@id"].replace("bts:", "") in (manifest_names_extracted):
        manifest_display_names_extracted.append(i["sms:displayName"])
manifest_display_names_extracted

# Create dictionary for lookup later
manifest_name_relationships = dict(
    zip(manifest_names_extracted, manifest_display_names_extracted)
)

manifest_name_relationships

{'IndividualHuman': 'Individual Human',
 'IndividualnonHuman': 'Individual nonHuman'}

In [None]:
# # Create the manifest templates based on the tables
# manifests = (
#     dm2.query('Properties == "dataProperty"')
#     .groupby("Parent")
#     .agg({"Attribute": list})
#     .reset_index()
# )

# manifests["Attribute"] = manifests["Attribute"].apply(lambda x: x + ["Component"])

# manifests["Required"] = "True"
# manifests["Properties"] = "dataType"

# manifests = manifests.rename({"Attribute": "DependsOn", "Parent": "Attribute"}, axis=1)
# manifests["DependsOn"] = manifests["DependsOn"].apply(lambda x: ",".join(x))


# # Store the data types in a list
# dataTypes = manifests["Attribute"].tolist()

In [50]:
# Template configuration
def manifest_template(k, v, t="file"):
    manifest_template = {"display_name": v, "schema_name": k, "type": t}
    return manifest_template


dca_template = {
    "manifest_schemas": [],
    "service_version": "v23.1.1",
    "schema_version": "v1.2",
}

records = [
    "IndividualHuman",
    "IndividualnonHuman",
    "Biospecimenhuman",
    "BiospecimennonHuman",
]


for k, v in manifest_name_relationships.items():
    if k in records:
        t = "records"
    else:
        t = "file"
    dca_template["manifest_schemas"].append(manifest_template(k, v, t))

json_formatted_str = json.dumps(dca_template, indent=2)
print(json_formatted_str)

with open(
    "../_data/dca_template.json",
    "w",
) as f:
    f.write(json_formatted_str)

{
  "manifest_schemas": [
    {
      "display_name": "Biospecimen human",
      "schema_name": "Biospecimenhuman",
      "type": "records"
    },
    {
      "display_name": "Biospecimen nonHuman",
      "schema_name": "BiospecimennonHuman",
      "type": "records"
    },
    {
      "display_name": "Individual Human",
      "schema_name": "IndividualHuman",
      "type": "records"
    },
    {
      "display_name": "Individual nonHuman",
      "schema_name": "IndividualnonHuman",
      "type": "records"
    },
    {
      "display_name": "Metabolomics Human",
      "schema_name": "MetabolomicsHuman",
      "type": "file"
    },
    {
      "display_name": "Microbiome",
      "schema_name": "Microbiome",
      "type": "file"
    },
    {
      "display_name": "RNAseq",
      "schema_name": "RNAseq",
      "type": "file"
    },
    {
      "display_name": "Whole Genome Sequencing",
      "schema_name": "WholeGenomeSequencing",
      "type": "file"
    },
    {
      "display_name": "bs

## Generate Manifests


In [52]:
import os
for k,v in manifest_name_relationships.items(): 
    os.system(f'sh ../scripts/regenerate_model_and_manifest.sh {k}')

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

../_scripts/regenerate_model_and_manifest.sh
-- ../_scripts/regenerate_model_and_manifest.sh


Starting schematic...
Traceback (most recent call last):
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/bin/schematic", line 8, in <module>
    sys.exit(main())
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/nlee/Library/Caches/pypoetry/virtualenvs/elite-data-models-LjmgyG4K-py3.10/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/Users/nlee/Libr

In [None]:
manifest = 'IndividualHuman'

print(f"schematic manifest --config {schematic_config} get -dt {manifest} --output_csv {os.path.join(output_path, manifest + '.csv')} --title {'EL_Manifest_' + manifest} --sheet_url 2>&1 >> manifest_generation_results.txt")

!echo '{manifest}:' >> manifest_generation_results.txt
!schematic manifest --config {schematic_config} get -dt {manifest} --output_csv {os.path.join(output_path, manifest + '.xlsx')} --title {'EL_Manifest_' + manifest} --sheet_url 2>&1 >> manifest_generation_results.txt

In [None]:
!echo ----- newly_generated_manifests ------- 2>&1 >> manifest_generation_links.txt
for manifest in manifest_names_extracted:
    print(manifest)
    !echo {manifest}: >> manifest_generation_results.txt
    !schematic manifest --config {schematic_config} get -dt {manifest} --output_xlsx {os.path.join(output_path, manifest + '.xlsx')} --title {'EL_Manifest_' + manifest} --sheet_url 2>&1 >> manifest_generation_results.txt
    !echo ----------------------------------------------------------------