### Workflow Automation Script Overview

This script orchestrates the execution of multiple Python scripts stored in a designated `scripts` folder. It is designed to automate a batch processing workflow—each script is run with the `--output-dir` argument pointing to a shared `outputs` folder. The script does the following:

- Uses the Python executable from the current virtual environment (`sys.executable`)
- Scans the `scripts` directory for Python files (excluding any starting with `_`)
- Runs each script sequentially using `subprocess.run()`
- Logs the success or failure of each run to both the console and a `workflow_log.txt` file with timestamps
- Ensures the output directory exists before running the scripts

This approach is useful for managing reproducible, auditable, multi-step data pipelines such as those used in reporting, validation, or export processes.

**Paths used:**
- Scripts directory: `scripts/`
- Output directory: `outputs/`
- Log file: `workflow_log.txt`


In [None]:
import subprocess
import os
import datetime
import sys

# Use Python interpreter from the current virtual environment
PYTHON_EXECUTABLE = sys.executable

# Define core directories
ROOT_DIR = "C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/"
SCRIPTS_DIR = os.path.join(ROOT_DIR, "scripts")
OUTPUT_DIR = os.path.join(ROOT_DIR, "outputs")
LOG_FILE = os.path.join(ROOT_DIR, "workflow_log.txt")

def log(message):
    """
    Logs a message to both the console and a log file, with a timestamp.

    Args:
        message (str): The message to log.
    """
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def run_script(script_path, output_dir):
    """
    Executes a Python script as a subprocess, passing the output directory
    as a command-line argument.

    Args:
        script_path (str): Path to the Python script to run.
        output_dir (str): Directory to pass to the script via --output-dir.
    """
    try:
        result = subprocess.run(
            [PYTHON_EXECUTABLE, script_path, "--output-dir", output_dir],
            capture_output=True,
            text=True,
            check=True
        )
        log(f"SUCCESS: {script_path}")
    except subprocess.CalledProcessError as e:
        log(f"FAIL: {script_path}")
        log(f"Error:\n{e.stderr.strip()}")
    except Exception as e:
        log(f"ERROR: {script_path} - {str(e)}")

def main():
    """
    Main workflow function. Finds all eligible Python scripts in the scripts directory
    and runs them sequentially, logging the outcome of each.
    """
    log("Starting workflow...")

    # Verify scripts directory exists
    if not os.path.exists(SCRIPTS_DIR):
        log(f"Scripts directory not found: {SCRIPTS_DIR}")
        return

    # Ensure output directory exists
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # List all .py files that don't start with "_" for execution
    py_files = sorted(
        f for f in os.listdir(SCRIPTS_DIR)
        if f.endswith(".py") and not f.startswith("_")
    )

    if not py_files:
        log("No Python scripts found in scripts directory.")
        return

    # Run each script and log the outcome
    for py_file in py_files:
        full_path = os.path.join(SCRIPTS_DIR, py_file)
        log(f"Running: {py_file}")
        run_script(full_path, OUTPUT_DIR)

    log("Workflow complete.")

if __name__ == "__main__":
    main()


[2025-06-03 11:21:24] Starting workflow...
[2025-06-03 11:21:24] Running: endpoints_missing_doc_urls.py
[2025-06-03 11:21:26] SUCCESS: C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/scripts\endpoints_missing_doc_urls.py
[2025-06-03 11:21:26] Running: full_datasette_tables.py
[2025-06-03 11:21:27] SUCCESS: C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/scripts\full_datasette_tables.py
[2025-06-03 11:21:27] Running: generate_odp_conformance_csv.py
[2025-06-03 11:21:28] SUCCESS: C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/scripts\generate_odp_conformance_csv.py
[2025-06-03 11:21:28] Running: generate_odp_issues_csv.py
[2025-06-03 11:21:30] SUCCESS: C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/scripts\generate_odp_issues_csv.py
[2025-06-03 11:21:30] Running: generate_odp_status_csv.py
[2025-06-03 11:21:31] SUCCESS: C:/Users/DanielGodden/Documents/MCHLG/collecting_and_managing_data/scripts\generate_odp_stat