In [None]:
## INITIALIZE ##
from dotenv import load_dotenv
import os
from langgraph_app import app_graph
from IPython.display import Markdown, display, HTML
import pprint

# Load environment variables from .env file in the current directory
load_dotenv()

# Enable automatic reloading
%load_ext autoreload
%autoreload 2

In [None]:
## SELECT FILES ##
import os
import glob
import warnings

# Suppress tkinter warnings on macOS
warnings.filterwarnings("ignore", category=DeprecationWarning)

try:
    import tkinter as tk
    from tkinter import filedialog
    
    # Suppress macOS console warnings
    import sys
    from contextlib import redirect_stderr
    from io import StringIO
    
    # Create a root window and hide it
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    root.attributes('-topmost', True)  # Bring dialog to front
    
    # Set the initial directory to the images folder
    initial_dir = os.path.abspath("images")
    
    print("Opening file selection dialog...")
    print("You can select multiple files by holding Cmd (Mac) or Ctrl (Windows/Linux)")
    
    # Capture stderr to suppress warnings
    f = StringIO()
    with redirect_stderr(f):
        # Open file dialog for multiple image files
        selected_images = filedialog.askopenfilenames(
            title="Select Discharge Summary Images",
            initialdir=initial_dir,
            filetypes=[
                ("Image files", "*.jpg *.jpeg *.png *.gif *.bmp *.tiff"),
                ("JPEG files", "*.jpg *.jpeg"),
                ("PNG files", "*.png"),
                ("All files", "*.*")
            ]
        )
    
    # Clean up the tkinter root window
    root.destroy()
    
    # Convert to list and make paths relative to current directory
    selected_images = list(selected_images)
    if selected_images:
        # Convert absolute paths to relative paths if they're in the images directory
        relative_images = []
        current_dir = os.getcwd()
        for img_path in selected_images:
            try:
                rel_path = os.path.relpath(img_path, current_dir)
                relative_images.append(rel_path)
            except ValueError:
                # If relpath fails, use absolute path
                relative_images.append(img_path)
        
        selected_images = relative_images
        
        print(f"\nSelected {len(selected_images)} files:")
        for file in selected_images:
            print(f"  - {file}")
        print("\nYou can now run the OCR cell with these selected files.")
    else:
        print("\nNo files selected.")
        selected_images = []

except ImportError:
    print("tkinter not available. Using fallback file selection...")
    # Fallback to showing available files
    image_files = glob.glob("images/*.jpeg") + glob.glob("images/*.jpg") + glob.glob("images/*.png")
    image_files = [f for f in image_files if not f.endswith('.DS_Store')]
    image_files.sort()
    
    print("Available image files:")
    for i, file in enumerate(image_files, 1):
        print(f"{i}. {file}")
    
    # Use all available files as default
    selected_images = image_files
    print(f"\nUsing all {len(selected_images)} available files:")
    for file in selected_images:
        print(f"  - {file}")

In [None]:
## OCR ##
# Use the selected images from the file selection above
if 'selected_images' in globals() and selected_images:
    images = selected_images
    print(f"Processing {len(images)} selected files:")
    for img in images:
        print(f"  - {img}")
else:
    # Fallback to default files if none selected
    images = ["images/3.1.jpeg", "images/3.2.jpeg", "images/3.3.jpeg", "images/3.4.jpeg"]
    print("No files selected, using default files:")
    for img in images:
        print(f"  - {img}")

print("\nRunning OCR...")
markdown = app_graph.run_node("OCR", images, "gpt-4o")

# Clean up markdown text - remove code block wrappers if present
if isinstance(markdown, str):
    if markdown.startswith("```markdown\n"):
        markdown = markdown[12:]  # Remove "```markdown\n"
    if markdown.endswith("\n```"):
        markdown = markdown[:-4]  # Remove "\n```"
    elif markdown.endswith("```"):
        markdown = markdown[:-3]  # Remove "```"

    # Also handle case where it starts with just ```
    if markdown.startswith("```\n"):
        markdown = markdown[4:]  # Remove "```\n"

    # Save the cleaned markdown to a file
    out_path = os.path.abspath("discharge.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(markdown)
    print(f"Markdown saved to {out_path}")

    # Try to expose a download link in the notebook UI
    try:
        from IPython.display import FileLink, display
        display(FileLink(out_path))
    except Exception:
        print("Could not create a download link inside the notebook. The file path is printed above.")

    # Also copy to /app/output if that directory exists (common in Docker image)
    try:
        import shutil
        output_dir = "/app/output"
        if os.path.isdir(output_dir):
            copy_path = os.path.join(output_dir, "discharge.md")
            shutil.copy(out_path, copy_path)
            print(f"Also copied to {copy_path}")
    except Exception:
        pass
else:
    print("OCR did not return text. No file written.")

# Launch the markdown file in the default application
import subprocess
import sys
try:
    if sys.platform == "darwin":  # macOS
        subprocess.run(["open", out_path], check=True)
        print("Opened discharge.md in default application")
    elif sys.platform == "win32":  # Windows
        subprocess.run(["start", out_path], shell=True, check=True)
        print("Opened discharge.md in default application")
    else:  # Linux
        subprocess.run(["xdg-open", out_path], check=True)
        print("Opened discharge.md in default application")
except Exception as e:
    print(f"Could not open discharge.md automatically: {e}")

# Display the markdown in the notebook
try:
    display(Markdown(markdown))
except Exception:
    pass

No files selected, using default files:
  - images/3.1.jpeg
  - images/3.2.jpeg
  - images/3.3.jpeg
  - images/3.4.jpeg

Running OCR...


NameError: name 'app_graph' is not defined

In [None]:
## CONFIGURE PROCESSING STEPS ##
# Configure which steps to run after OCR
run_extract_diagnoses = True  # Set to False to skip diagnoses extraction
run_extract_medications = True  # Set to False to skip medication extraction
run_fix_medications = True  # Set to False to skip PharmeEasy integration
run_generate_summary = True  # Set to False to skip HTML summary generation

print("Processing Steps Configuration:")
print(f"📊 Extract Diagnoses: {'✅ Enabled' if run_extract_diagnoses else '❌ Disabled'}")
print(f"💊 Extract Medications: {'✅ Enabled' if run_extract_medications else '❌ Disabled'}")
print(f"🔗 Fix Medications (PharmeEasy): {'✅ Enabled' if run_fix_medications else '❌ Disabled'}")
print(f"📋 Generate Summary: {'✅ Enabled' if run_generate_summary else '❌ Disabled'}")
print()
print("💡 Tip: Change the variables above to True/False to enable/disable steps")
print("📝 Note: Some steps depend on previous ones (e.g., Fix Medications needs Extract Medications)")

In [None]:
## EXTRACT DIAGNOSES ##
if 'run_extract_diagnoses' in globals() and run_extract_diagnoses:
    print("🩺 Extracting diagnoses...")
    diagnoses = app_graph.run_node("ExtractDiagnoses", model="gpt-4o-mini")
    print("✅ Diagnoses extraction completed")
else:
    print("⏭️ Skipping diagnoses extraction (disabled in configuration)")
    diagnoses = None

In [None]:
## EXTRACT MEDICATIONS ##
if 'run_extract_medications' in globals() and run_extract_medications:
    print("💊 Extracting medications...")
    medications = app_graph.run_node("ExtractMedications", model="gpt-4o-mini")
    print("✅ Medications extraction completed")
else:
    print("⏭️ Skipping medications extraction (disabled in configuration)")
    medications = None

In [None]:
## FIX MEDICATIONS ##
if 'run_fix_medications' in globals() and run_fix_medications:
    # Check if medications were extracted first
    if 'medications' in globals() and medications is not None:
        print("🔗 Matching medications with PharmeEasy...")
        fixed_medications = app_graph.run_node("FixMedications", model="gpt-4o")
        print("✅ Medication fixing completed")
    else:
        print("⚠️ Cannot fix medications - medications extraction was skipped or failed")
        print("💡 Enable 'run_extract_medications' to use this step")
        fixed_medications = None
else:
    print("⏭️ Skipping medication fixing (disabled in configuration)")
    fixed_medications = None

In [1]:
## GENERATE SUMMARY ##
if 'run_generate_summary' in globals() and run_generate_summary:
    print("📋 Generating interactive HTML summary...")
    html_summary = app_graph.run_node("AddSummaryPills")
    
    # Save the HTML summary to a file
    with open("summary.html", "w", encoding="utf-8") as f:
        f.write(html_summary)
    print("Summary saved to summary.html")
    
    # Launch the HTML file in the default application
    import subprocess
    import sys
    try:
        if sys.platform == "darwin":  # macOS
            subprocess.run(["open", "summary.html"], check=True)
            print("Opened summary.html in default browser")
        elif sys.platform == "win32":  # Windows
            subprocess.run(["start", "summary.html"], shell=True, check=True)
            print("Opened summary.html in default browser")
        else:  # Linux
            subprocess.run(["xdg-open", "summary.html"], check=True)
            print("Opened summary.html in default browser")
    except subprocess.CalledProcessError as e:
        print(f"Could not open summary.html automatically: {e}")
    except FileNotFoundError:
        print("Could not find system command to open summary.html")
    
    # Display the HTML in the notebook
    display(HTML(html_summary))
    print("✅ HTML summary generation completed")
else:
    print("⏭️ Skipping HTML summary generation (disabled in configuration)")
    html_summary = None

⏭️ Skipping HTML summary generation (disabled in configuration)
