In [None]:
import logging
import subprocess
from tqdm import tqdm
import pandas as pd
import yaml
import os
from pathlib import Path
import re

cwd = Path(os.getcwd())

root_dir_name = "ELITE-data-models"
for p in cwd.parents:
    if bool(re.search(root_dir_name + "$", str(p))):
        print(p)
        ROOT_DIR = p

## Get manifest names to generate manifests

In [4]:
import json

with open(Path(ROOT_DIR, 'EL.data.model.jsonld'), "r") as jf:
    jo = json.load(jf)

In [5]:
# Manifest names in data model
# find templates

templates = []

for i in jo["@graph"]:
    try: 
        for subclasses in i['rdfs:subClassOf']: 
            if bool(
                re.search(
                    "Component", ",".join(subclasses.values()), flags=re.IGNORECASE
                )
            ):
                templates += [
                    {"label": i["rdfs:label"], "displayName": i["sms:displayName"]}
                ]

    except: 
        pass 

templates_df = pd.DataFrame.from_records(templates)

templates_df

Unnamed: 0,label,displayName
0,Genotyping,genotyping
1,Biospecimenhuman,Biospecimen human
2,BiospecimennonHuman,Biospecimen nonHuman
3,BsSeq,bsSeq
4,Metabolomics,Metabolomics
5,Proteomics,proteomics
6,FileAnnotationTemplate,file_annotation_template
7,IndividualHuman,Individual Human
8,IndividualnonHuman,Individual nonHuman
9,Microbiome,Microbiome


In [7]:
# # Template configuration
# def manifest_template(k, v, t="file"):
#     manifest_template = {"display_name": v, "schema_name": k, "type": t}
#     return manifest_template


# dca_template = {
#     "manifest_schemas": [],
#     "service_version": "v23.1.1",
#     "schema_version": "v1.2",
# }

# records = [
#     "IndividualHuman",
#     "IndividualnonHuman",
#     "Biospecimenhuman",
#     "BiospecimennonHuman",
# ]


# for k, v in manifest_name_relationships.items():
#     if k in records:
#         t = "records"
#     else:
#         t = "file"
#     dca_template["manifest_schemas"].append(manifest_template(k, v, t))

# json_formatted_str = json.dumps(dca_template, indent=2)
# print(json_formatted_str)

# with open(
#     "../_data/dca_template.json",
#     "w",
# ) as f:
#     f.write(json_formatted_str)

## Generate Manifests


In [29]:
from datetime import datetime
timestamp = datetime.now().strftime("%Y-%m-%d")

In [44]:
# create logger for reports
with open(Path(ROOT_DIR, '_logs', 'logging.yaml'), 'r', encoding = 'UTF-8') as f: 
    yaml_config = yaml.safe_load(f)
    logging.config.dictConfig(yaml_config)

# Get logger
logger = logging.getLogger("default")

In [53]:
fh = logging.FileHandler(filename=Path(ROOT_DIR, 'tests', 'logs', timestamp + '_manifest_generation.log'))
fh.setFormatter(logger.handlers[0].__dict__["formatter"])
logger.addHandler(fh)

In [54]:
logger.__dict__

{'filters': [],
 'name': 'default',
 'level': 10,
 'parent': <RootLogger root (DEBUG)>,
 'propagate': True,
 'handlers': [<StreamHandler stdout (DEBUG)>,
  <FileHandler /Users/nlee/Documents/Projects/ELITE/ELITE-data-models/tests/logs/2024-04-10_manifest_generation.log (NOTSET)>],
 'disabled': False,
 '_cache': {},
 'manager': <logging.Manager at 0x101492c80>}

In [55]:
manifest_generation_results = []

for t in tqdm(templates_df['label'], total = len(templates_df['label']), miniters=1):
    result_temp = {"template_name": t}

    command = f""" schematic manifest --config config.yml get -dt {t} -s"""

    logger.info(f'Running command for {t}')
    proc = subprocess.Popen(
        command,
        cwd=ROOT_DIR,
        shell=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )

    stdout, stderr = proc.communicate()

    try: 
        for l in stdout.readlines():
            logger.info(l)

        for l in stderr.readlines(): 
            logger.debug(l)
    except: 
        pass # just move on

    if proc.returncode == 0: 
        test_result = True
        logger.info(f"{t} has PASSED")

    else: 
        test_result = False
        logger.debug(command)
        logger.debug(f"{t} has FAILED")

    result_temp['generation_test'] = test_result

    manifest_generation_results += [result_temp]

manifest_generation_results

  0%|          | 0/13 [00:00<?, ?it/s]

Running command for Genotyping
Waiting...
2024-04-10 16:48:54,676 - default - INFO - Genotyping has PASSED
2024-04-10 16:48:54,676 - default - INFO - Genotyping has PASSED


  8%|▊         | 1/13 [00:29<05:52, 29.37s/it]

Finished process
--------------------
Running command for Biospecimenhuman
Waiting...
2024-04-10 16:49:23,139 - default - INFO - Biospecimenhuman has PASSED
2024-04-10 16:49:23,139 - default - INFO - Biospecimenhuman has PASSED


 15%|█▌        | 2/13 [00:57<05:17, 28.84s/it]

Finished process
--------------------
Running command for BiospecimennonHuman
Waiting...
2024-04-10 16:49:55,438 - default - INFO - BiospecimennonHuman has PASSED
2024-04-10 16:49:55,438 - default - INFO - BiospecimennonHuman has PASSED


 23%|██▎       | 3/13 [01:30<05:04, 30.42s/it]

Finished process
--------------------
Running command for BsSeq
Waiting...
2024-04-10 16:50:26,233 - default - INFO - BsSeq has PASSED
2024-04-10 16:50:26,233 - default - INFO - BsSeq has PASSED


 31%|███       | 4/13 [02:00<04:35, 30.57s/it]

Finished process
--------------------
Running command for Metabolomics
Waiting...
2024-04-10 16:51:02,793 - default - INFO - Metabolomics has PASSED
2024-04-10 16:51:02,793 - default - INFO - Metabolomics has PASSED


 38%|███▊      | 5/13 [02:37<04:21, 32.73s/it]

Finished process
--------------------
Running command for Proteomics
Waiting...
2024-04-10 16:51:38,228 - default - INFO - Proteomics has PASSED
2024-04-10 16:51:38,228 - default - INFO - Proteomics has PASSED


 46%|████▌     | 6/13 [03:12<03:55, 33.65s/it]

Finished process
--------------------
Running command for FileAnnotationTemplate
Waiting...
2024-04-10 16:52:28,925 - default - INFO - FileAnnotationTemplate has PASSED
2024-04-10 16:52:28,925 - default - INFO - FileAnnotationTemplate has PASSED


 54%|█████▍    | 7/13 [04:03<03:55, 39.22s/it]

Finished process
--------------------
Running command for IndividualHuman
Waiting...
2024-04-10 16:52:57,995 - default - INFO - IndividualHuman has PASSED
2024-04-10 16:52:57,995 - default - INFO - IndividualHuman has PASSED


 62%|██████▏   | 8/13 [04:32<02:59, 35.99s/it]

Finished process
--------------------
Running command for IndividualnonHuman
Waiting...
2024-04-10 16:53:26,356 - default - INFO - IndividualnonHuman has PASSED
2024-04-10 16:53:26,356 - default - INFO - IndividualnonHuman has PASSED


 69%|██████▉   | 9/13 [05:01<02:14, 33.61s/it]

Finished process
--------------------
Running command for Microbiome
Waiting...
2024-04-10 16:53:56,236 - default - INFO - Microbiome has PASSED
2024-04-10 16:53:56,236 - default - INFO - Microbiome has PASSED


 77%|███████▋  | 10/13 [05:30<01:37, 32.45s/it]

Finished process
--------------------
Running command for RNAseq
Waiting...
2024-04-10 16:54:27,186 - default - INFO - RNAseq has PASSED
2024-04-10 16:54:27,186 - default - INFO - RNAseq has PASSED


 85%|████████▍ | 11/13 [06:01<01:03, 31.99s/it]

Finished process
--------------------
Running command for ScRNAseq
Waiting...
2024-04-10 16:54:58,084 - default - INFO - ScRNAseq has PASSED
2024-04-10 16:54:58,084 - default - INFO - ScRNAseq has PASSED


 92%|█████████▏| 12/13 [06:32<00:31, 31.66s/it]

Finished process
--------------------
Running command for WholeGenomeSequencing
Waiting...
2024-04-10 16:55:31,801 - default - INFO - WholeGenomeSequencing has PASSED
2024-04-10 16:55:31,801 - default - INFO - WholeGenomeSequencing has PASSED


100%|██████████| 13/13 [07:06<00:00, 32.81s/it]

Finished process
--------------------





[{'template_name': 'Genotyping', 'generation_test': True},
 {'template_name': 'Biospecimenhuman', 'generation_test': True},
 {'template_name': 'BiospecimennonHuman', 'generation_test': True},
 {'template_name': 'BsSeq', 'generation_test': True},
 {'template_name': 'Metabolomics', 'generation_test': True},
 {'template_name': 'Proteomics', 'generation_test': True},
 {'template_name': 'FileAnnotationTemplate', 'generation_test': True},
 {'template_name': 'IndividualHuman', 'generation_test': True},
 {'template_name': 'IndividualnonHuman', 'generation_test': True},
 {'template_name': 'Microbiome', 'generation_test': True},
 {'template_name': 'RNAseq', 'generation_test': True},
 {'template_name': 'ScRNAseq', 'generation_test': True},
 {'template_name': 'WholeGenomeSequencing', 'generation_test': True}]

In [56]:
manifest_generation_results = pd.DataFrame(manifest_generation_results)

In [57]:
manifest_generation_results

Unnamed: 0,template_name,generation_test
0,Genotyping,True
1,Biospecimenhuman,True
2,BiospecimennonHuman,True
3,BsSeq,True
4,Metabolomics,True
5,Proteomics,True
6,FileAnnotationTemplate,True
7,IndividualHuman,True
8,IndividualnonHuman,True
9,Microbiome,True


In [58]:
    manifest_generation_results.to_csv(
        Path(ROOT_DIR, "tests", 'manifest-templates', timestamp + "_manifest_generation_results.csv", index = False)
    )

In [None]:
# load current DCA config
with open(Path(ROOT_DIR, "dca-template-config.json"), "r", encoding="UTF-8") as f:
    dca_template_config = json.load(f)

print(json.dumps(dca_template_config, indent=2))

In [None]:
# check against new data model