In [39]:
import subprocess
import re

def get_clipboard_data():
    p = subprocess.Popen(['pbpaste'], stdout=subprocess.PIPE)
    return p.stdout.read().decode('utf-8')

def set_clipboard_data(data):
    p = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE)
    p.communicate(input=data.encode('utf-8'))

In [40]:
def convert_table(table_html):
    # Convert HTML table to Markdown table
    rows = re.findall(r'<tr>(.*?)</tr>', table_html, flags=re.DOTALL)
    if not rows:
        return ''
    
    markdown_rows = []
    header_cells = re.findall(r'<th>(.*?)</th>', rows[0], flags=re.DOTALL)
    if header_cells:
        markdown_rows.append('| ' + ' | '.join(header_cells) + ' |')
        markdown_rows.append('| ' + ' | '.join(['---'] * len(header_cells)) + ' |')
        rows = rows[1:]
    
    for row in rows:
        cells = re.findall(r'<td>(.*?)</td>', row, flags=re.DOTALL)
        if cells:
            markdown_rows.append('| ' + ' | '.join(cells) + ' |')
    
    return '\\n'.join(markdown_rows) + '\\n\\n'

In [41]:
def convert_to_markdown():
    # Get text from clipboard
    text = get_clipboard_data()
    
    # Pre-process: remove newlines between tags
    text = re.sub(r'>\s*\n\s*<', '><', text)
    
    # Convert headers (h1 to h6)
    text = re.sub(r'<h([1-6])>(.*?)</h\1>', lambda m: '#' * int(m.group(1)) + ' ' + m.group(2) + '\\n\\n', text)
    
    # Convert code blocks
    text = re.sub(r'<pre><code>(.*?)</code></pre>', lambda m: '```\\n' + m.group(1) + '\\n```\\n\\n', text, flags=re.DOTALL)
    text = re.sub(r'<code>(.*?)</code>', r'`\\1`', text)
    
    # Convert bold
    text = re.sub(r'<(?:b|strong)>(.*?)</(?:b|strong)>', r'**\\1**', text)
    
    # Convert italic
    text = re.sub(r'<(?:i|em)>(.*?)</(?:i|em)>', r'*\\1*', text)
    
    # Convert links
    text = re.sub(r'<a href="(.*?)".*?>(.*?)</a>', r'[\\2](\\1)', text)
    
    # Convert tables
    text = re.sub(r'<table>(.*?)</table>', lambda m: convert_table(m.group(1)), text, flags=re.DOTALL)
    
    # Convert unordered lists
    text = re.sub(r'<ul>(.*?)</ul>', lambda m: m.group(1), text, flags=re.DOTALL)
    text = re.sub(r'<li>(.*?)</li>', r'- \\1\\n', text)
    
    # Convert ordered lists
    text = re.sub(r'<ol>(.*?)</ol>', lambda m: m.group(1), text, flags=re.DOTALL)
    text = re.sub(r'<li>(.*?)</li>', r'1. \\1\\n', text)
    
    # Convert blockquotes
    text = re.sub(r'<blockquote>(.*?)</blockquote>', lambda m: '> ' + m.group(1).replace('\\n', '\\n> ') + '\\n\\n', text, flags=re.DOTALL)
    
    # Convert paragraphs
    text = re.sub(r'<p>(.*?)</p>', r'\\1\\n\\n', text)
    
    # Convert line breaks
    text = re.sub(r'<br\\s*/?>', r'\\n', text)
    
    # Remove remaining HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    
    # Clean up extra whitespace
    text = re.sub(r'\\n\\s*\\n', '\\n\\n', text)
    text = text.strip()
    
    # Copy converted text back to clipboard
    set_clipboard_data(text)
    return text

In [42]:
# Test the conversion
result = convert_to_markdown()
print("Text converted to Markdown and copied to clipboard:")
print("-" * 50)
print(result)

Text converted to Markdown and copied to clipboard:
--------------------------------------------------
