In [9]:
# from IPython.display import display
# display(images[0])

In [None]:
# !pip install pytesseract

In [None]:
# !pip install pdf2image pillow pandas camelot-py[cv] PyMuPDF

In [11]:
import os
import re
import fitz  # PyMuPDF
import camelot
import pytesseract
import pandas as pd
from pdf2image import convert_from_path
from PIL import Image
import json
from io import BytesIO

import warnings
warnings.filterwarnings("ignore")

# --- CONFIGURE EXECUTABLE PATHS ---
POPLER_PATH = "C:/Program Files/poppler-24.08.0/Library/bin"  # use forward slashes!
TESSERACT_PATH = "C:/Program Files/Tesseract-OCR/tesseract.exe"

# --- ENSURE TESSERACT PATH IS SET ---
if not os.path.isfile(TESSERACT_PATH):
    raise RuntimeError(f"Tesseract not found at {TESSERACT_PATH}. Check the path!")
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH

def extract_table_images_ocr(pdf_path, page_num, dpi=300):
    """Detect table-like regions on the PDF page using PyMuPDF, crop to those rectangles,
    perform OCR, and return list of OCR'd table texts (strings)."""
    ocr_results = []
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    pix = page.get_pixmap(dpi=dpi)
    img = Image.open(BytesIO(pix.tobytes("png")))

    # Heuristic: Find rectangles likely to be tables (try 'Table' text and big blocks)
    rects = page.search_for("Table")
    if not rects:
        rects = []
        for block in page.get_text("dict")["blocks"]:
            if block['type'] == 0 and block.get("lines"):
                r = fitz.Rect(block['bbox'])
                if r.width > 150 and r.height > 50:
                    rects.append(r)
    for r in rects:
        cropped = img.crop((r.x0, r.y0, r.x1, r.y1))
        ocr_text = pytesseract.image_to_string(cropped)
        if ocr_text.strip():
            ocr_results.append(ocr_text.strip())
    return ocr_results

def pdf_to_markdown_json(pdf_path, output_base, poppler_path=POPLER_PATH):
    doc = fitz.open(pdf_path)
    md_lines = []
    pages_data = []
    for i in range(len(doc)):
        page = doc[i]
        page_text = page.get_text()
        md_lines.append(f"\n\n# Page {i+1}\n")
        md_lines.append(page_text if page_text else "")

        page_data = {
            "page": i+1,
            "text": page_text,
            "tables": [],
            "ocr_tables": [],
            "ocr_full": ""
        }

        # --- Table extraction with Camelot (text-based tables only)
        try:
            tables = camelot.read_pdf(pdf_path, pages=str(i+1), flavor="stream")
            for t in tables:
                df = t.df
                if len(df) > 1:
                    try:
                        df.columns = [str(col).strip() for col in df.iloc[0]]
                        df = df[1:]
                    except Exception:
                        pass
                table_md = df.to_markdown(index=False)
                md_lines.append(f"\n[TABLE]\n{table_md}")
                page_data["tables"].append(df.values.tolist())
        except Exception as e:
            print(f"[Table error] PDF: {os.path.basename(pdf_path)} p{i+1}: {e}")

        # --- Table-region image OCR (for image-based tables)
        try:
            ocr_tables = extract_table_images_ocr(pdf_path, i)
            for t_ocr in ocr_tables:
                md_lines.append(f"\n[OCR_TABLE]\n{t_ocr}")
                page_data["ocr_tables"].append(t_ocr)
        except Exception as e:
            print(f"[OCR_TABLE error] PDF: {os.path.basename(pdf_path)} p{i+1}: {e}")

        # --- Full-page OCR (for scanned or hard-to-read text)
        try:
            images = convert_from_path(
                pdf_path, first_page=i+1, last_page=i+1, poppler_path=poppler_path
            )
            ocr_full = pytesseract.image_to_string(images[0])
            if ocr_full.strip():
                md_lines.append(f"\n[OCR]\n{ocr_full.strip()}")
                page_data["ocr_full"] = ocr_full.strip()
        except Exception as e:
            print(f"[OCR error] PDF: {os.path.basename(pdf_path)} p{i+1}: {e}")

        pages_data.append(page_data)

    # Write markdown
    with open(f"{output_base}.md", "w", encoding="utf-8") as f_md:
        f_md.write("\n".join(md_lines))
    # Write JSON
    with open(f"{output_base}.json", "w", encoding="utf-8") as f_json:
        json.dump(pages_data, f_json, indent=2, ensure_ascii=False)

def batch_pdf_folder_to_markdown_json(pdf_folder, output_folder, poppler_path=POPLER_PATH):
    os.makedirs(output_folder, exist_ok=True)
    for filename in os.listdir(pdf_folder):
        if filename.lower().endswith(".pdf"):
            pdf_path = os.path.join(pdf_folder, filename)
            # Always use forward slashes for all paths
            pdf_path = pdf_path.replace("\\", "/")
            output_base = os.path.join(output_folder, filename.rsplit(".", 1)[0])
            output_base = output_base.replace("\\", "/")
            print(f"Processing {filename} ...")
            pdf_to_markdown_json(pdf_path, output_base, poppler_path=poppler_path)
            print(f"Written: {output_base}.md and .json")

# === USAGE ===
batch_pdf_folder_to_markdown_json("endo_suite_pdfs", "converted_pdfs")


Processing CHEST Standardized Curriculum - Essential Skills Segment.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/CHEST Standardized Curriculum - Essential Skills Segment.md and .json
Processing Diagnostic Bronchoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/Diagnostic Bronchoscopy Module Book.md and .json
Processing Diagnostic Hysteroscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Diagnostic Hysteroscopy Module Book.md and .json
Processing Emergency Bronchoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/Emergency Bronchoscopy Module Book.md and .json
Processing EMR-ESD Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/EMR-ESD Module Book.md and .json
Processing ENDO GI User Guide .pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/ENDO GI User Guide .md and .json
Processing ENDO HYST User Guide.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/ENDO HYST User Guide.md and .json
Processing ENDO Mentor Suite Service Guide.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/ENDO Mentor Suite Service Guide.md and .json
Processing ENDO Suite - BRONCH Mentor Simulator.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/ENDO Suite - BRONCH Mentor Simulator.md and .json
Processing ENDO URO Mentor User Guide .pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/ENDO URO Mentor User Guide .md and .json
Processing Essential Bronchoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Essential Bronchoscopy Module Book.md and .json
Processing Essential EBUS Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/Essential EBUS Module Book.md and .json
Processing EUS Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/EUS Module Book.md and .json
Processing EUS Task Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/EUS Task Module Book.md and .json
Processing First Module for Bleeding Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/First Module for Bleeding Module Book.md and .json
Processing First Module for Colonoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/First Module for Colonoscopy Module Book.md and .json
Processing First Module for ERCP Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/First Module for ERCP Module Book.md and .json
Processing First Module for Gastroscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/First Module for Gastroscopy Module Book.md and .json
Processing First Module for Sigmoidoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/First Module for Sigmoidoscopy Module Book.md and .json
Processing GI Endoscopy Fundamental Skills Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generat

Written: converted_pdfs/GI Endoscopy Fundamental Skills Module Book.md and .json
Processing Hysteroscopic Myomectomy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Hysteroscopic Myomectomy Module Book.md and .json
Processing Hysteroscopic Polypectomy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Hysteroscopic Polypectomy Module Book.md and .json
Processing Second Module for Colonoscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Second Module for Colonoscopy Module Book.md and .json
Processing Second Module for ERCP Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Second Module for ERCP Module Book.md and .json
Processing Second Module for Gastroscopy Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/Second Module for Gastroscopy Module Book.md and .json
Processing Stone Manipulation Module Books.pdf ...
Written: converted_pdfs/Stone Manipulation Module Books.md and .json
Processing Strictures Module Book.pdf ...
Written: converted_pdfs/Strictures Module Book.md and .json
Processing TURBT Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/TURBT Module Book.md and .json
Processing TURP Basic Skills Proctor Book.pdf ...
Written: converted_pdfs/TURP Basic Skills Proctor Book.md and .json
Processing TURP Procedure Proctor Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/TURP Procedure Proctor Book.md and .json
Processing URO Basic Tasks Module Book.pdf ...


  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)
  cols, rows, v_s, h_s = self._generate_columns_and_rows(bbox, user_cols)


Written: converted_pdfs/URO Basic Tasks Module Book.md and .json


In [None]:
# https://github.com/oschwartz10612/poppler-windows/releases/ # download and paste unzipped folder under program files in c
# set path in env vars as: C:\Program Files\poppler-24.08.0\Library\bin
# restart kernel and set path in notebook as well