Notecase import script

jaap-karssenberg edited this page Oct 25, 2013 · 1 revision

file "notecase2zim.py":

#!/usr/bin/python

# Simple script to convert NoteCase Document to a Zim notebook folder
#
# NoteCase reference: 
#    http://notecase.sourceforge.net/  (Free version, discontinued)
#    http://www.virtual-sky.com/   (Pro version)
#
# Based on BeautifulSoup (you need to install it before running notecase2zim): 
#    http://www.crummy.com/software/BeautifulSoup/
#
# Adapted to my use of NoteCase and Zim => other may want to adapt it
# For instance:
#   Color "red" in NoteCase => I use "italic" in Zim
#   Background Color "grey" in NoteCase => Title 3 in Zim
#
# Usage :
# -------
# 1. Save NoteCase document to .ncd format (plain text, no compression)
# 2. This script assumes the name is "notecase.ncd". This can be changed below
# 3. Run: python notecase2zim.py
# 4. Get a Folder named "notecase.zim" with the main file "notebook.zim" inside
#
# v1.1
# Jigho 2011
# Contact: https://launchpad.net/~jigho
#

import os
import shutil
import sys
import re
import datetime
sys.path.append('./BeautifulSoup')

from BeautifulSoup import BeautifulSoup

notecasefile = 'notecase.ncd'

def create_file_zim():
    # You may change the name and endofline mode here
    fileZim = open('notebook.zim', 'w')
    fileZim.write('[Notebook]\nname=Notes\nversion=0.4\nendofline=dos')
    fileZim.close()
    
def process_title(titre, date):
    # Some titles are plain, but some have information that we do not use in Zim
    if (titre.span):
        m = titre.span.contents
        titre2 = str(m[1])
    elif (titre.string):
        titre2 = titre.string
    else:
        m = titre.contents
        titre2 = str(m[1])

    # Delete white space, / and " in the filename
    output1 = str(titre2 + '.txt').replace(' ', '_')
    output2 = output1.replace('/', '')
    output3 = output2.replace('\"', '')
    output = unicode(output3, 'utf-8', errors='ignore')

    # Some verbose, usefull on large contents
    # to be aware that the program is still processing...
    print 'Creating file: ', output
    
    fileOut = open(output, 'w')

    # Standard information at the start of any Zim file
    fileOut.write('Content-Type: text/x-zim-wiki\n')
    fileOut.write('Wiki-Format: zim 0.4\n')
    fileOut.write('Creation-Date: ' + str(date) + '\n')
    
    fileOut.write('\n====== ' + titre2 + ' ======\n')
    fileOut.write('\n')

    return fileOut

def create_subdir(repertoire):
    rep = repertoire.name.replace('.txt', '')
    os.mkdir(rep)
    os.chdir(rep)

def process_format(c, fichier, formatString):
    # for basic formatting tags (underline, bold, italic,...)
    # do the core job

    newLine = False
    
    # Open Wiki format
    fichier.write(formatString)

    # Another trick in case of formatted content ends with a newline
    # I then prefer to close the formatting tag and then write the
    # new line without formatting
    if (len(c.contents) > 1):
        if (c.contents[-2].__class__.__name__ == 'Tag'):
            if (c.contents[-2].name == 'br'):
                c.contents[-2].extract()
                c.contents[-1].extract()
                newLine = True
            
    # Process content (recursively !)
    process_content(c, fichier, formatString)

    # Close Wiki format
    fichier.write(formatString)

    # End of the trick for content finishing with a newline
    if newLine:
        fichier.write('\n')

def process_content(contenu, fichier, currentFormat):
    # "currentFormat" is a trick to close the Wiki format at end of each line
    # even if the format is applied to multi-lines
    # Nota: this trick would need to be be enhanced
    #       when multiple formats are nested

    for c in contenu:
        if (c.__class__.__name__ == 'Tag'):
            # <dl> tag stands for new note, ie new Zim file
            if c.name == 'dl':
                create_subdir(fichier)
                process_page(c)
                os.chdir('..')

            # <br> tag stands for new line
            # use the "currentFormat" trick to properly close format tag
            # and then reopen it on the the new line
            elif c.name == 'br':
                fichier.write(currentFormat)
                fichier.write('\n')
                fichier.write(currentFormat)

            # <u> tag stands for underline
            elif c.name == 'u':
                process_format(c, fichier, '__')

            # <b> tag stands for bold
            elif c.name == 'b':
                process_format(c, fichier, '**')

            # <i> tag stands fr italic
            elif c.name == 'i':
                process_format(c, fichier, '//')

            # <s> tag stands for strike-through
            elif c.name == 's':
                process_format(c, fichier, '~~')
                
            # <span> tag can have different purposes according to arguments
            elif c.name == 'span':
                # Color "red" in NoteCase => I use "italic" in Zim
                if (c['style'] == "color:#ff0000"):
                    process_format(c, fichier, '//')
                # Color "blue" in NoteCase => I use "bold" in Zim
                elif (c['style'] == "color:#0000ff"):
                    process_format(c, fichier, '**')
                # Color "green" in NoteCase => I use "bold" in Zim
                elif (c['style'] == "color:#00ff00"):
                    process_format(c, fichier, '**')
                # Background Color "grey" in NoteCase => Title 3 in Zim
                elif (c['style'] == "background-color:#bfbfbf"):
                    fichier.write('===== ')
                    # Don't not use the "currentFormat" trick,
                    # since title format is not symetrical
                    #(which add difficulty)
                    # and Zim seems to autoclose this format at the end of line
                    process_content(c, fichier, currentFormat)
                    currentFormat = ''
                # Other <span> contents are treated as plain text
                # You may add more cases according to your needs
                else:
                    print "WARNING : unknown SPAN type", c.attrs
                    process_content(c, fichier, currentFormat)

            # <p> tag is not taken into account
            elif c.name == 'p':
                process_content(c, fichier, currentFormat)

            # <a> tag stands for links
            elif c.name == 'a':
                fichier.write('[[')
                fichier.write(c['href'].encode('utf-8'))
                fichier.write('|')
                process_content(c, fichier, currentFormat)
                fichier.write(']]')

            # In case program encounter a Tag which is not dealt with
            # according to your needs, you can then add specific bloc
            else:    
                print 'WARNING, unknown tag: ', c.name
                fichier.write(
                    'TAG ' + c.name + ' / ' + c.string.encode("UTF-8"))

        else:
            ligne = c.string.encode("UTF-8")
            # Delete the new line symbol at start of the line
            # This happens when there was a <br> just before
            # but <br> is already taken into account
            fichier.write(re.sub("^\n", '', ligne))

def process_page(page):
    creation = datetime.date.today()
    for a in page.contents:
        if (a.__class__.__name__ == 'Tag'):
            if a.name == 'dt':
                fileOut = process_title(a, creation)
            elif a.name == 'dd':
                process_content(a.contents, fileOut, '')
        elif (a.__class__.__name__ == 'Comment'):
            m = re.match("<!--property:date_created=(.*)-->$", str(a))
            if (m):
                creation = datetime.date.fromtimestamp(float(m.group(1)))

def main(repertoire):
    xml = open(notecasefile, 'r').read()
    soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.XML_ENTITIES)
    level0 = soup.html.body.dl
    os.chdir(repertoire)
    create_file_zim()
    process_page(level0)

if __name__ == '__main__':
    zimdir = re.sub(".ncd$", ".zim", notecasefile)
    #shutil.rmtree(zimdir)
    os.mkdir(zimdir)
    main(zimdir)
Clone this wiki locally
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.
Press h to open a hovercard with more details.