In [3]:
import dmpchef
import os
import json
from pathlib import Path

# Input JSON path
input_path = Path("data") / "inputs" / "input.json"
with input_path.open("r", encoding="utf-8") as f:
    data = json.load(f)

print(json.dumps(data, indent=2, ensure_ascii=False))



{
  "config": {
    "funding": {
      "agency": "NIH",
      "subagency": "NIMH"
    },
    "pipeline": {
      "rag": true,
      "llm": {
        "provider": "ollama",
        "model_name": "llama3.3:latest"
      }
    },
    "export": {
      "md": true,
      "docx": true,
      "pdf": true,
      "dmptool_json": true
    }
  },
  "inputs": {
    "research_area": "Autism and social communication",
    "research_context": "This study examines social communication and neurodevelopmental profiles in youth with autism compared to typically developing peers using behavioral, clinical, and neurobiological measures. NDA GUID elements will be collected for submission to the NIMH Data Archive (NDA).",
    "data_types": "Demographic and clinical datasets; behavioral and cognitive assessments; neuroimaging and/or physiological measures; derived variables; documentation (protocols, data dictionary, README).",
    "data_source": "IRB-approved human participant study using standardized assessm

In [4]:
# ----------------------------
# Run No-RAG 
# ----------------------------
from dmpchef.api import generate

# No-RAG:
result = generate(input_path, use_rag=False)

# Show export file paths only
print("\n Export files:")
for name, path in result["outputs"].items():
    print(f"{name}: {path}")



{"timestamp": "2026-02-12T18:42:06.058999Z", "level": "info", "event": "Config loaded successfully"}
{"llm": "llama3.3:latest", "embed": "sentence-transformers/all-MiniLM-L6-v2", "hf_cache_dir": "data/cache/hf", "local_files_only": true, "allow_download_if_missing": true, "temperature": null, "num_predict": null, "num_ctx": null, "top_p": null, "top_k": null, "timestamp": "2026-02-12T18:42:06.061001Z", "level": "info", "event": "ModelLoader initialized"}
{"llm": "llama3.3:latest", "rag_default": true, "data_pdfs": "data\\database", "index_dir": "data\\index", "debug_dir": "data\\outputs\\debug", "force_rebuild_index": false, "timestamp": "2026-02-12T18:42:06.069997Z", "level": "info", "event": "DMPPipeline initialized"}
{"funding_agency": "NIH", "prompt_type": "nih_dmp", "template_md": "data\\inputs\\dmp-template.md", "retrieval_hint": "NIH Data Management and Sharing Plan (DMSP) guidance", "timestamp": "2026-02-12T18:42:06.071001Z", "level": "info", "event": "Funding agency selected"}


 Export files:
markdown: C:\Users\Nahid\dmpchef\data\outputs\markdown\req_8f03ec1fc5__104304__norag__llama3.3_latest.md
dmptool_json: C:\Users\Nahid\dmpchef\data\outputs\json\req_8f03ec1fc5__104304__norag__llama3.3_latest.dmptool.json
docx: C:\Users\Nahid\dmpchef\data\outputs\docx\req_8f03ec1fc5__104304__norag__llama3.3_latest.docx
pdf: C:\Users\Nahid\dmpchef\data\outputs\pdf\req_8f03ec1fc5__104304__norag__llama3.3_latest.pdf





In [5]:
# ----------------------------
# Run RAG 
# ----------------------------
from dmpchef.api import generate

# RAG:
result = generate(input_path, use_rag=True)

# Show export file paths only
print("\n Export files:")
for name, path in result["outputs"].items():
    print(f"{name}: {path}")


{"timestamp": "2026-02-12T18:43:08.347932Z", "level": "info", "event": "Config loaded successfully"}
{"llm": "llama3.3:latest", "embed": "sentence-transformers/all-MiniLM-L6-v2", "hf_cache_dir": "data/cache/hf", "local_files_only": true, "allow_download_if_missing": true, "temperature": null, "num_predict": null, "num_ctx": null, "top_p": null, "top_k": null, "timestamp": "2026-02-12T18:43:08.349931Z", "level": "info", "event": "ModelLoader initialized"}
{"llm": "llama3.3:latest", "rag_default": true, "data_pdfs": "data\\database", "index_dir": "data\\index", "debug_dir": "data\\outputs\\debug", "force_rebuild_index": false, "timestamp": "2026-02-12T18:43:08.357935Z", "level": "info", "event": "DMPPipeline initialized"}
{"funding_agency": "NIH", "prompt_type": "nih_dmp", "template_md": "data\\inputs\\dmp-template.md", "retrieval_hint": "NIH Data Management and Sharing Plan (DMSP) guidance", "timestamp": "2026-02-12T18:43:08.359934Z", "level": "info", "event": "Funding agency selected"}


 Export files:
markdown: C:\Users\Nahid\dmpchef\data\outputs\markdown\req_8f03ec1fc5__104402__rag__k6__llama3.3_latest.md
dmptool_json: C:\Users\Nahid\dmpchef\data\outputs\json\req_8f03ec1fc5__104402__rag__k6__llama3.3_latest.dmptool.json
docx: C:\Users\Nahid\dmpchef\data\outputs\docx\req_8f03ec1fc5__104402__rag__k6__llama3.3_latest.docx
pdf: C:\Users\Nahid\dmpchef\data\outputs\pdf\req_8f03ec1fc5__104402__rag__k6__llama3.3_latest.pdf





In [8]:

output_path = Path("data") / "outputs" /"json"/"req_8f03ec1fc5__104304__norag__llama3.3_latest.dmptool.json"
with output_path.open("r", encoding="utf-8") as f:
    data = json.load(f)

print(json.dumps(data, indent=2, ensure_ascii=False))

{
  "dmptool": {
    "provenance": "dmpchef",
    "narrative": {
      "title": "NIH Data Management and Sharing Plan",
      "section": [
        {
          "order": 1,
          "title": "Element 1: Data Type",
          "description": "",
          "question": [
            {
              "order": 1,
              "text": "Types and amount of scientific data expected to be generated in the project",
              "answer": {
                "json": {
                  "answer": "The project 'req_8f03ec1fc5' aims to examine social communication and neurodevelopmental profiles in youth with autism compared to typically developing peers. The types of scientific data expected to be generated include demographic and clinical datasets, behavioral and cognitive assessments, neuroimaging and/or physiological measures, derived variables, and documentation such as protocols, data dictionary, and README files. The estimated amount of data is substantial, given the multi-modal nature of the d

In [10]:
output_path = Path("data") / "outputs" /"json"/"req_8f03ec1fc5__104402__rag__k6__llama3.3_latest.dmptool.json"
with output_path.open("r", encoding="utf-8") as f:
    data = json.load(f)

print(json.dumps(data, indent=2, ensure_ascii=False))

{
  "dmptool": {
    "provenance": "dmpchef",
    "narrative": {
      "title": "NIH Data Management and Sharing Plan",
      "section": [
        {
          "order": 1,
          "title": "Element 1: Data Type",
          "description": "",
          "question": [
            {
              "order": 1,
              "text": "Types and amount of scientific data expected to be generated in the project",
              "answer": {
                "json": {
                  "answer": "This study on autism and social communication will generate multi-modal datasets from youth participants, including demographic and clinical datasets, behavioral and cognitive assessments, neuroimaging and/or physiological measures, derived variables, and associated documentation. The estimated volume of data is substantial, with neurobiological data representing the largest portion of storage.",
                  "type": "textArea"
                }
              }
            },
            {
           