In [None]:
import time
import subprocess
import os
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from apscheduler.schedulers.background import BackgroundScheduler
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# User-defined variables
DIRECTORY = r"C:\Users\JohnDeHart\Documents\GitHub\Notebooks\twc_interactions\\"
NOTEBOOK_FILE = "pull_vendor_api.ipynb"
JSON_FILE = "VendorA.json"
PIPELINE_FILE = "update_box.pipeline"

# Enable test mode
TEST_MODE = False

class ChangeHandler(FileSystemEventHandler):
    def __init__(self):
        self.file_changed = False

    def on_modified(self, event):
        if event.src_path == f"{DIRECTORY}{JSON_FILE}":
            logging.info(f"File {event.src_path} has been modified")
            self.file_changed = True

def run_notebook():
    logging.info("Executing notebook to pull API data...")
    result = subprocess.run(
        ["jupyter", "nbconvert", "--to", "notebook", "--execute", "--inplace", f"{DIRECTORY}{NOTEBOOK_FILE}"],
        capture_output=True, text=True)
    logging.info("Notebook execution completed.")
    if "Writing" in result.stderr:  # Check if it's a standard nbconvert output
        logging.info(f"Notebook Update: {result.stderr}")
    elif result.stderr:
        logging.error(f"Notebook Errors: {result.stderr}")

def check_and_run_pipeline():
    if event_handler.file_changed and not check_and_run_pipeline.is_running:
        check_and_run_pipeline.is_running = True
        logging.info("Change detected in JSON file, initiating Elyra pipeline...")
        
        pipeline_path = f"{DIRECTORY}{PIPELINE_FILE}"
        command = ["elyra-pipeline", "run", pipeline_path]
        
        if os.path.exists(pipeline_path):
            result = subprocess.run(command, capture_output=True, text=True)
            if result.stdout:
                logging.info(f"Pipeline Output: {result.stdout}")
            if result.stderr:
                logging.error(f"Pipeline Errors: {result.stderr}")
            if result.returncode != 0:
                logging.error("Failed to execute the pipeline.")
        else:
            logging.error(f"Pipeline file not found: {pipeline_path}")
        
        event_handler.file_changed = False
        check_and_run_pipeline.is_running = False
    else:
        logging.info("No changes detected in JSON file since last check or pipeline is still running.")

# Attach a flag to the function to monitor its running state
check_and_run_pipeline.is_running = False

if __name__ == "__main__":
    event_handler = ChangeHandler()
    observer = Observer()
    observer.schedule(event_handler, DIRECTORY, recursive=False)
    observer.start()

    scheduler = BackgroundScheduler()
    scheduler.add_job(run_notebook, 'interval', minutes=2)  # Adjusted to run every 2 minutes
    scheduler.add_job(check_and_run_pipeline, 'interval', minutes=2)
    scheduler.start()

    # Simulate a file change for testing purposes
    if TEST_MODE:
        time.sleep(5)  # Wait a few seconds before simulating
        logging.info("Test Mode: Simulating file change...")
        event_handler.file_changed = True  # Manually set the flag to simulate file change

    try:
        while True:
            time.sleep(1)
    except (KeyboardInterrupt, SystemExit):
        observer.stop()
        scheduler.shutdown()
    observer.join()

2024-05-14 17:38:51,031 - INFO - Adding job tentatively -- it will be properly scheduled when the scheduler starts
2024-05-14 17:38:51,032 - INFO - Adding job tentatively -- it will be properly scheduled when the scheduler starts
2024-05-14 17:38:51,032 - INFO - Added job "run_notebook" to job store "default"
2024-05-14 17:38:51,033 - INFO - Added job "check_and_run_pipeline" to job store "default"
2024-05-14 17:38:51,034 - INFO - Scheduler started
2024-05-14 17:40:51,033 - INFO - Running job "run_notebook (trigger: interval[0:02:00], next run at: 2024-05-14 17:40:51 EDT)" (scheduled at 2024-05-14 17:40:51.030063-04:00)
2024-05-14 17:40:51,036 - INFO - Executing notebook to pull API data...
2024-05-14 17:40:51,035 - INFO - Running job "check_and_run_pipeline (trigger: interval[0:02:00], next run at: 2024-05-14 17:42:51 EDT)" (scheduled at 2024-05-14 17:40:51.032081-04:00)
2024-05-14 17:40:51,040 - INFO - No changes detected in JSON file since last check or pipeline is still running.
20