<a href="https://colab.research.google.com/github/gngai/Supernote-Exporter/blob/main/Supernote_Converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# --- IMPORTS ---
import os
import glob
import hashlib
import re
import time
import subprocess
from datetime import datetime
from google.colab import drive

# --- MOUNT GOOGLE DRIVE ---
drive.mount('/content/drive', force_remount=True)

# --- INSTALL/UPGRADE PACKAGES ---
!pip install --upgrade -q supernotelib Pillow

print("✅ Cell 1: Imports, Drive mount, and package installation complete!")

Mounted at /content/drive
✅ Cell 1: Imports, Drive mount, and package installation complete!


In [7]:
# --- CONFIGURATION ---

# Define the core paths for your folders.
BASE_PATH = '/content/drive/MyDrive'
INPUT_FOLDER = os.path.join(BASE_PATH, 'Supernote/Note')
OUTPUT_FOLDER = os.path.join(BASE_PATH, 'Supernote_Export')

# Internal folders for organization, prefixed with '_'.
_ARCHIVE_FOLDER = os.path.join(OUTPUT_FOLDER, '_Archive')
_MD5_FOLDER = os.path.join(OUTPUT_FOLDER, '_MD5_Hashes')

# Subdirectories to exclude from processing.
EXCLUDE_SUBDIRECTORIES = ['Shapes', 'EXPORT']

# Number of old PDF versions to keep in the archive for each note.
ARCHIVE_COPIES_TO_KEEP = 5

# Arguments for the 'supernote-tool' command-line utility.
SUPERNOTE_TOOL_ARGS = [
    'convert',  # The main command to convert a file.
    '-a',       # Process all pages in the note file.
    '-t', 'pdf' # Set the output type to PDF.
]

print("✅ Cell 2: Configuration loaded!")

✅ Cell 2: Configuration loaded!


In [8]:
def initialize_environment():
    """Ensures all necessary directories exist before running the script."""
    print("Initializing folder structure...")
    for folder in [INPUT_FOLDER, OUTPUT_FOLDER, _ARCHIVE_FOLDER, _MD5_FOLDER]:
        os.makedirs(folder, exist_ok=True)
    print("Folder structure verified.")

def _get_valid_note_files():
    """Internal helper to get all .note files, respecting exclusions."""
    all_files = glob.glob(os.path.join(INPUT_FOLDER, '**/*.note'), recursive=True)
    return [f for f in all_files if not any(ex in os.path.relpath(f, INPUT_FOLDER).split(os.sep) for ex in EXCLUDE_SUBDIRECTORIES)]

def _calculate_md5(file_path):
    """Calculates the MD5 hash of a file."""
    hash_md5 = hashlib.md5()
    try:
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk)
        return hash_md5.hexdigest()
    except IOError as e:
        print(f"    ⚠️ Could not calculate MD5 for {os.path.basename(file_path)}: {e}")
        return None

def convert_note_file(note_file_path):
    """Converts a single .note file to PDF if it has changed, with archiving."""
    try:
        relative_path = os.path.relpath(note_file_path, INPUT_FOLDER)
        print(f"🔄 Processing: {relative_path}")

        # Calculate Paths
        base_name, _ = os.path.splitext(os.path.basename(note_file_path))
        relative_dir = os.path.dirname(relative_path)
        output_subdir = os.path.join(OUTPUT_FOLDER, relative_dir)
        archive_subdir = os.path.join(_ARCHIVE_FOLDER, relative_dir)
        md5_subdir = os.path.join(_MD5_FOLDER, relative_dir)
        for folder in [output_subdir, archive_subdir, md5_subdir]: os.makedirs(folder, exist_ok=True)

        output_pdf_path = os.path.join(output_subdir, f"{base_name}.pdf")
        hash_file_path = os.path.join(md5_subdir, f"{base_name}.md5")

        # Check for Changes
        current_hash = _calculate_md5(note_file_path)
        last_hash = open(hash_file_path, 'r').read().strip() if os.path.exists(hash_file_path) else None
        if current_hash and current_hash == last_hash and os.path.exists(output_pdf_path):
            print("    ☑️ No changes detected. Skipping.")
            return None

        # Archive and Convert
        if os.path.exists(output_pdf_path):
            timestamp = datetime.fromtimestamp(os.path.getmtime(output_pdf_path)).strftime("%Y%m%d_%H%M%S")
            archive_path = os.path.join(archive_subdir, f"{base_name}_{timestamp}.pdf")
            print(f"    📦 Archiving existing PDF...")
            os.rename(output_pdf_path, archive_path)
            existing_archives = sorted(glob.glob(os.path.join(archive_subdir, f'{base_name}_*.pdf')))
            for old_archive in existing_archives[:-ARCHIVE_COPIES_TO_KEEP]: os.remove(old_archive)

        print("    ⚙️ Converting to PDF...")
        command = ['supernote-tool'] + SUPERNOTE_TOOL_ARGS + [note_file_path, output_pdf_path]
        subprocess.run(command, check=True, capture_output=True, text=True)

        if current_hash:
            with open(hash_file_path, 'w') as f: f.write(current_hash)

        print(f"    ✅ Success! Saved to: {os.path.relpath(output_pdf_path, BASE_PATH)}")
        return True

    except subprocess.CalledProcessError as e:
        print(f"    ❌ ERROR during conversion: {e.stderr.strip()}")
        return False
    except Exception as e:
        print(f"    ❌ An unexpected error occurred with {os.path.basename(note_file_path)}: {e}")
        return False

# --- INITIALIZE THE ENVIRONMENT ---
initialize_environment()
print("\n✅ Cell 3: Core logic loaded and environment initialized!")

Initializing folder structure...
Folder structure verified.

✅ Cell 3: Core logic loaded and environment initialized!


In [9]:
def convert_all_files():
    """Converts all valid .note files, skipping any that haven't changed."""
    print("--- 🚀 Starting Full Conversion Process ---")
    files_to_process = _get_valid_note_files()
    total_files = len(files_to_process)
    if not total_files:
        print("No .note files found to process.")
        return

    print(f"Found {total_files} files to check for conversion.")
    processed_count, converted_count = 0, 0
    for file_path in files_to_process:
        result = convert_note_file(file_path)
        if result is False: continue
        processed_count += 1
        if result is True: converted_count += 1

    print("\n--- ✨ Full Conversion Complete ---")
    print(f"📊 {converted_count} of {total_files} file(s) were newly converted or updated.")
    print(f"✅ Processed {processed_count} of {total_files} files without errors.")


def clean_export_folder():
    """Moves PDFs to the archive if their source .note file is gone."""
    print("--- 🧹 Archiving Orphan Exports ---")
    expected_pdfs = {os.path.splitext(os.path.relpath(f, INPUT_FOLDER))[0] + '.pdf' for f in _get_valid_note_files()}
    all_exported_pdfs = [f for f in glob.glob(os.path.join(OUTPUT_FOLDER, '**/*.pdf'), recursive=True)
                         if not os.path.relpath(f, OUTPUT_FOLDER).startswith('_')]

    archived_count = 0
    for pdf_path in all_exported_pdfs:
        relative_pdf = os.path.relpath(pdf_path, OUTPUT_FOLDER)
        if relative_pdf not in expected_pdfs:
            print(f"📁 Archiving orphan: {relative_pdf}")
            base_name, _ = os.path.splitext(os.path.basename(pdf_path))
            relative_dir = os.path.dirname(relative_pdf)
            archive_subdir = os.path.join(_ARCHIVE_FOLDER, relative_dir)
            os.makedirs(archive_subdir, exist_ok=True)

            timestamp = datetime.fromtimestamp(os.path.getmtime(pdf_path)).strftime("%Y%m%d_%H%M%S")
            archive_filename = f"{base_name}_{timestamp}_ARCHIVED.pdf"
            archive_path = os.path.join(archive_subdir, archive_filename)

            os.rename(pdf_path, archive_path)
            md5_path = os.path.join(_MD5_FOLDER, os.path.splitext(relative_pdf)[0] + '.md5')
            if os.path.exists(md5_path): os.remove(md5_path)
            archived_count += 1

    print(f"✅ Archiving complete. Archived {archived_count} file(s).")


def clean_archive_folder():
    """Deletes all archived versions of a note if its main PDF no longer exists."""
    print("--- 🧹 Cleaning Archive Folder ---")
    primary_pdfs = {f for f in glob.glob(os.path.join(OUTPUT_FOLDER, '**/*.pdf'), recursive=True)
                    if not os.path.relpath(f, OUTPUT_FOLDER).startswith('_')}
    all_archives = glob.glob(os.path.join(_ARCHIVE_FOLDER, '**/*_*.pdf'), recursive=True)

    deleted_count = 0
    archive_pattern = re.compile(r'^(.*?)_(\d{8}_\d{6})(_ARCHIVED)?\.pdf$')

    for archive_path in all_archives:
        match = archive_pattern.match(os.path.basename(archive_path))
        if match:
            base_name = match.group(1)
            relative_dir = os.path.relpath(os.path.dirname(archive_path), _ARCHIVE_FOLDER)
            primary_pdf_path = os.path.join(OUTPUT_FOLDER, relative_dir, f"{base_name}.pdf") if relative_dir != '.' else os.path.join(OUTPUT_FOLDER, f"{base_name}.pdf")

            if primary_pdf_path not in primary_pdfs:
                print(f"🗑️ Deleting orphan archive: {os.path.relpath(archive_path, _ARCHIVE_FOLDER)}")
                os.remove(archive_path)
                deleted_count += 1

    print(f"✅ Archive cleanup complete. Deleted {deleted_count} orphan archive(s).")

print("✅ Cell 4: High-level and cleanup functions loaded!")

✅ Cell 4: High-level and cleanup functions loaded!


In [10]:
# ==============================================================================
# --- ⚙️ EXECUTION HUB ---
#
# Instructions:
# 1. Run Cells 1, 2, and 3 to load everything.
# 2. Uncomment ONE of the function calls below.
# 3. Run this cell to perform the action.
# ==============================================================================


# --- CONVERT ALL FILES ---
# Checks all notes and converts only those that have been added or changed.
convert_all_files()


# --- CLEANUP FUNCTIONS ---
# Use these to organize your exported files and archives.

# Archives PDFs if the source .note file is gone.
# clean_export_folder()

# Deletes old archived versions for notes that no longer have a primary PDF.
# clean_archive_folder()


# ------------------------------------------------------------------------------
# print("✅ Execution Hub is ready. Uncomment a command to run.")

--- 🚀 Starting Full Conversion Process ---
Found 25 files to check for conversion.
🔄 Processing: Medicaid.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Wish List Purchases.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Heating Schedule.note
    ☑️ No changes detected. Skipping.
🔄 Processing: House Projects.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Clothing Inventory.note
    ☑️ No changes detected. Skipping.
🔄 Processing: 20241001_141333.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Frivolous Spend 2025.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Estate Planning.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Food Recipe Mods.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Divorce Checklist.note
    ☑️ No changes detected. Skipping.
🔄 Processing: @Priorities@.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Travel.note
    ☑️ No changes detected. Skipping.
🔄 Processing: Gift Ideas.note
    ☑️ No chang