In [None]:
import glob
import sys
import os
from pathlib import Path

# Get absolute path to the directory containing word_to_markdown.py
project_dir = os.path.abspath('.')
sys.path.insert(0, project_dir)  # Insert at beginning of path

# Debug to verify paths
print(f"Current working directory: {os.getcwd()}")
print(f"Added to path: {project_dir}")
print(f"Python path: {sys.path}")

# Check if file exists
module_path = os.path.join(project_dir, "word_to_markdown.py")
print(f"Module path exists: {os.path.exists(module_path)}")

# Import the converter from your script
from word_to_markdown import WordToMarkdownConverter

volume_path = "test_files"

# Get all PDFs
all_docx = glob.glob(f"{volume_path}/*.docx")

# Initialize the converter
converter = WordToMarkdownConverter(
    preserve_tables=True,
    preserve_images=True,
    preserve_lists=True
)

# Create an output directory for the converted markdown files
output_dir = "markdown_files"
os.makedirs(output_dir, exist_ok=True)

# Convert each DOCX file to Markdown
converted_files = []
for docx_file in all_docx:
    try:
        # Define output path (same filename but with .md extension in the output directory)
        docx_filename = os.path.basename(docx_file)
        output_path = os.path.join(output_dir, Path(docx_filename).with_suffix('.md'))
        
        # Perform the conversion
        converted_file = converter.convert_file(docx_file, output_path)
        converted_files.append(converted_file)
        print(f"Successfully converted: {docx_file}")
    except Exception as e:
        print(f"Failed to convert {docx_file}: {e}")

print(f"Converted {len(converted_files)} files. Output files are in the {output_dir} directory.")



Converted: test_files/test.docx → markdown_files/test.md
Successfully converted: test_files/test.docx
Converted 1 files. Output files are in the markdown_files directory.
