Remove line breaks custom tool

jaap-karssenberg edited this page Oct 25, 2013 · 1 revision
Clone this wiki locally

A Great Example To Show The Usage Of The "Custom Tools"

Created by nomnex

Case: How to remove the manual line breaks of simple text emails (e.g. Newsgroups, private msg., etc.) you past in Zim.

  • Download and put the script in your "/home/user/bin" directory or wherever you like
  • Make it executable.
  • Go into Zim Tools --> Custom Tools, and add the script with the "%f" commandline argument.

This will add the script in the tools menu and you can process pages in one go.

file "remove_line_breaks.py":

#!/usr/bin/python

import sys
import re


def split_headers(text):
    '''Split zim headers from text and removes both seperately'''
    if text.startswith('Content-Type:'):
        # mail style headers
        headers, text = text.split('\n\n', 1)
            # split on first empty line
        return headers, text
    else: 
        # no zim headers
        return '', text


def join_headers(headers, text):
    '''Join zim headers with body text, returns single page source'''
    if headers:
        return headers.rstrip() + '\n\n' + text.lstrip()
    else:
        return text.lstrip()


def remove_line_breaks(text):
    '''Removes line breaks within paragraphs, but keeps empty lines'''
    pattern = re.compile(r'^[ \t]+\n', re.M) # pattern for empty lines
    text = pattern.sub('\n', text) # fix empty lines to be really empty

    pattern = re.compile(r'(?<!==)\n') # pattern for newline not at end of heading in zim wiki syntax
    parts = text.split('\n\n') # split on empty lines
    parts = [pattern.sub(' ', p) for p in parts] # replace line breaks with space
    parts = [p for p in parts if len(p) and not p.isspace()] # remove empty para
    return '\n\n'.join(parts) # join with empty lines


def remove_line_breaks_in_zim_page(file):
    '''Remove line breaks in a zim page file'''
    fh = open(file)
    text = fh.read()
    fh.close()

    headers, text = split_headers(text)
    text = remove_line_breaks(text)
    text = join_headers(headers, text)

    fh = open(file, 'w')
    fh.write(text)
    fh.close()


if __name__ == '__main__':
    file = sys.argv[1] # first commandline argument
    remove_line_breaks_in_zim_page(file)