In [2]:
import json
import pandas as pd
import re
from datetime import datetime

In [3]:
# load data model
with open('../../EL.data.model.jsonld', 'r', encoding='UTF-8') as f: 
    dm_json = json.load(f)

In [4]:
# get template names
for i in dm_json['@graph']: 
    try: 
        if bool(re.search('template', i['@id'])): 
            # print(i)
            templates = [v for n in i["schema:domainIncludes"] for _,v in n.items()]
    except KeyError: 
        pass

In [5]:
# re-create template
# get 'sms:displayName' for "display_name" and '@id' for "schema_name"

manifest_schemas = []

for i in dm_json['@graph']: 
    try: 
        if i['@id'] in templates: 
            if bool(re.search('individual|biospecimen', i['@id'], flags=re.IGNORECASE)): 
                record_type = 'record'
            else: 
                record_type = 'file'
                
            temp_template = {
                'display_name': i['sms:displayName'], 
                "schema_name": i['@id'].strip('bts:'),
                "type": record_type
            }

            manifest_schemas.append(temp_template)
    except KeyError: 
        pass

manifest_schemas_df = (
    pd.DataFrame(manifest_schemas)
    .sort_values(["type", "display_name"], ascending=True)
    .reset_index(drop=True)
)
print(manifest_schemas_df.to_markdown(index=False))
print()
print("Shape: ", manifest_schemas_df.shape)

| display_name                           | schema_name                        | type   |
|:---------------------------------------|:-----------------------------------|:-------|
| assay_RNAseq_template                  | AssayRNAseqTemplate                | file   |
| assay_bsSeq_template                   | AssayBsSeqTemplate                 | file   |
| assay_metabolomics_template            | AssayMetabolomicsTemplate          | file   |
| assay_metagenomics_template            | AssayMetagenomicsTemplate          | file   |
| assay_microbiome_template              | AssayMicrobiomeTemplate            | file   |
| assay_phenotype_human_template         | AssayPhenotypeHumanTemplate        | file   |
| assay_proteomics_template              | AssayProteomicsTemplate            | file   |
| assay_scRNAseq_template                | AssayScRNAseqTemplate              | file   |
| assay_whole_genome_sequencing_template | AssayWholeGenomeSequencingTemplate | file   |
| file_annotation_tem

In [21]:
from IPython.display import Markdown

In [24]:
with open("../../dca-template-config.json", "r", encoding="UTF-8") as f:
    template_config = json.load(f)

# difference
m = manifest_schemas_df.merge(
    pd.DataFrame(template_config["manifest_schemas"]),
    on="display_name",
    how="outer",
    suffixes=["_new", "_previous"],
    indicator=True,
)


print(m[sorted(list(m.columns))].to_markdown(index = False))

template_config["manifest_schemas"] = manifest_schemas  # update manifests
template_config["schema_version"] = "v" + datetime.now().strftime(
    "%Y.%m.%d"
)  # increment schema version

| _merge     | display_name                           | schema_name_new                    | schema_name_previous   | type_new   | type_previous   |
|:-----------|:---------------------------------------|:-----------------------------------|:-----------------------|:-----------|:----------------|
| right_only | Biospecimen_human                      | nan                                | BiospecimenHuman       | nan        | record          |
| right_only | Biospecimen_non_Human                  | nan                                | BiospecimenNonHuman    | nan        | record          |
| right_only | Genotyping_Human                       | nan                                | GenotypingHuman        | nan        | file            |
| right_only | Individual_Human                       | nan                                | IndividualHuman        | nan        | record          |
| right_only | Individual_non_Human                   | nan                                | IndividualNon

In [25]:
print('New Schema: \n')
print(json.dumps(template_config, indent = 4))

New Schema: 

{
    "manifest_schemas": [
        {
            "display_name": "biospecimen_human_template",
            "schema_name": "BiospecimenHumanTemplate",
            "type": "record"
        },
        {
            "display_name": "biospecimen_non_human_template",
            "schema_name": "BiospecimenNonHumanTemplate",
            "type": "record"
        },
        {
            "display_name": "assay_bsSeq_template",
            "schema_name": "AssayBsSeqTemplate",
            "type": "file"
        },
        {
            "display_name": "genotyping_human_template",
            "schema_name": "GenotypingHumanTemplate",
            "type": "file"
        },
        {
            "display_name": "individual_human_template",
            "schema_name": "IndividualHumanTemplate",
            "type": "record"
        },
        {
            "display_name": "individual_non_human_template",
            "schema_name": "IndividualNonHumanTemplate",
            "type": "record"

In [27]:
with open("../../dca-template-config.json", "w", encoding="UTF-8") as f:
    json.dump(template_config, f, ensure_ascii=False, indent=4)