In [1]:
import re
import json

In [4]:
def markdown_to_json(markdown_file, json_file):
    with open(markdown_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # Initialize the JSON structure
    document = {}
    current_section = document
    stack = [document]
    
    # Split the content into lines
    lines = content.split('\n')
    
    headers = []
    table = None
    
    for line in lines:
        # Check for headings
        heading_match = re.match(r'^(#+)\s+(.+)$', line)
        if heading_match:
            level = len(heading_match.group(1))
            text = heading_match.group(2)
            
            # Adjust the stack based on the heading level
            while len(stack) > level:
                stack.pop()
            
            # Create a new section for the heading
            new_section = {}
            stack[-1][text] = new_section
            stack.append(new_section)
            current_section = new_section
            continue
        
        # Check for table headers
        if '|' in line and '-|-' in next(iter(lines[lines.index(line)+1:]), ''):
            headers = [header.strip() for header in line.split('|') if header.strip()]
            table = []
            current_section['table'] = table
            continue
        
        # Check for table rows
        if '|' in line and table is not None and headers:
            row_data = [cell.strip() for cell in line.split('|') if cell.strip()]
            if len(row_data) == len(headers):
                row = dict(zip(headers, row_data))
                table.append(row)
            continue
        
        # Regular text content
        if line.strip():
            if 'paragraphs' not in current_section:
                current_section['paragraphs'] = []
            current_section['paragraphs'].append(line.strip())
        
        # Reset table and headers if we're not in a table anymore
        if '|' not in line:
            table = None
            headers = []

    # Write the JSON to a file
    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(document, f, indent=2, ensure_ascii=False)

In [5]:
# Usage
markdown_to_json('/Users/luke.thompson/git/protocols/protocol_PCR_16S_V4V5.md', 'test.json')
