In [90]:
import string

In [81]:
def get_content_level(strip_line):
    head, sep, tail = strip_line.partition(' ')
    num_of_sharp = len(head)  # count how many '#' there are
    section_title = tail.strip()
    return section_title, num_of_sharp - 1

def retrieve_headers(input_fn):
    headers = header('', -1, True) # the virtual root
    with open(input_fn, encoding='utf-8', mode = 'r') as md_f:
        for line in md_f:
            # print(repr(line))
            strip_line = line.strip()

            if not (len(strip_line) > 0 and strip_line[0] == '#'):
                continue

            section_title, level = get_content_level(strip_line)
            # print("print while reading the file. Section title: {}, level: {}".format(section_title, level))
            new_header = header(section_title, level)

            headers.absorb(new_header)
    
    return headers

In [88]:
# -*- coding: utf-8 -*-
class header():
    def __init__(self, content='', level=-1, is_virtual=False):
        self.is_virtual = is_virtual  # vritual header is used for the section title which directly starts from higher levels.
        self.header_title = content
        self.my_level = level    # level starts from 0.
        self.my_order = 0
        self.child_headers = []

    def assign_order(self):
        order = 1
        for header in self.child_headers:
            header.my_order = order
            order = order + 1
            header.assign_order()

    def add_child_header(self, new_section_header):
        self.child_headers.append(new_section_header)

    def absorb(self, new_header):
        if new_header.my_level <= self.my_level:
            raise ValueError("The new header has a higher level ({}) than the current level({}).".format(new_header.my_level, self.my_level))

        if new_header.my_level == self.my_level + 1:
            self.child_headers.append(new_header)
        else:
            if len(self.child_headers) == 0:
                self.child_headers.append(header('', self.my_level + 1, True))

            self.child_headers[-1].absorb(new_header)

    def get_level_symbol(self, sym):
        if not isinstance(sym, str):
            raise ValueError("Given symbol format ({}) not correct, which must be a string.".format(sym))

        return  sym * self.my_level

    def conv_title(self, header):
        if not isinstance(header, str):
            raise ValueError("Given header has type ({}). But a string is required.".format(type(header)))

        header = header.lower()  # Lower case the string
        header.strip(string.punctuation.translate({ord('-'): None})) # remove any punction other than hypen
        header = ' '.join(header.split()).replace(' ', '-') # change any space into a hypen
        return header
            
    def get_anchor_link(self, header, existing_anchor_links):
        new_anchor_link = self.conv_title(header)
        num_appearance = existing_anchor_links.count(new_anchor_link)
        existing_anchor_links.append(new_anchor_link)
        
        if num_appearance > 0:
            new_anchor_link = "{}-{}".format(new_anchor_link, str(num_appearance))
        
        # print("new_anchor_link: ", new_anchor_link)
        return new_anchor_link
            
    
    def gen_gfm_anchor(self, existing_anchor_links=[]):
        self.anchor = None
        if not self.is_virtual:
            # Generate the link to the current header
            anchor_link = self.get_anchor_link(self.header_title, existing_anchor_links)

            # print("header_title: {}, anchor_link: {}\n".format(self.header_title, anchor_link))
            self.anchor = "[{:s}](#{:s})".format(indent, self.header_title, anchor_link) 

            
        # generate the link to the child headers
        for child_header in self.child_headers:
            child_header.gen_gfm_toc()
        
        return toc

    def print_headers(self, print_level=False, order_info=''):
        # print("hello world")
        if self.my_level == -1:
            new_order_info = ''
        elif self.my_level == 0:
            new_order_info = str(self.my_order)
        else:
            new_order_info = order_info + '.' + str(self.my_order)

        if not self.is_virtual and self.my_level != -1:
            print("{} {} {}".format(self.get_level_symbol(' '), new_order_info, self.header_title))

        for child_header in self.child_headers:
            child_header.print_headers(print_level, new_order_info)

In [89]:
input_fn = r"Example.md"
headers = retrieve_headers(input_fn)
headers.assign_order()
headers.print_headers(True)
print(headers.gen_gfm_toc())


   1.1.1 h3 level
 2 h1 header
  2.1 Good h2 Header
  2.2 The Second h2 Header 中文
 3 h1 Header
   3.1.1 Direct h3 Header
    3.1.1.1 H4 header
  3.2 Third h2 header
- [h3 level](#h3-level)
- [h1 header](#h1-header)
- [Good h2 Header](#good-h2-header)
- [The Second h2 Header 中文](#the-second-h2-header-中文)
- [h1 Header](#h1-header-1)
- [Direct h3 Header](#direct-h3-header)
- [H4 header](#h4-header)
- [Third h2 header](#third-h2-header)



In [39]:
import unicodedata

u = chr(233) + chr(0x0bf2) + chr(3972) + chr(6000) + chr(13231)

for i, c in enumerate(u):
    print(i, '%04x' % ord(c), unicodedata.category(c), end=" ")
    print(unicodedata.name(c))

# Get numeric value of second character
print(unicodedata.numeric(u[1]))

0 00e9 Ll LATIN SMALL LETTER E WITH ACUTE
1 0bf2 No TAMIL NUMBER ONE THOUSAND
2 0f84 Mn TIBETAN MARK HALANTA
3 1770 Lo TAGBANWA LETTER SA
4 33af So SQUARE RAD OVER S SQUARED
1000.0


In [35]:
headers.print_headers(True)

hello world


In [6]:
str_list = ['a', 'aa', 'bb', 'a']
str_list.count('b')

0

In [None]:
def gen_toc(headers):
    

In [118]:
def retrieve_plain_headers(input_fn):
    try:
        md_f = open(input_fn, 'r')
    except:
        raise OSError("Fail to open the file: ", input_fn)

    headers = []
    for line in md_f:
        strip_line = line.strip()
        if len(strip_line) > 0 and strip_line[0] == '#':
            head, sep, tail = strip_line.partition(' ')
            num_of_sharp = len(head)  # count how many '#' there are
            section_header = tail.strip()
            new_header = header(num_of_sharp, section_header)
            
            headers.append(header)

    return headers

In [7]:
def get_top_level(headers):
    return min(headers, key = lambda item:item[0])[0]

def shift_level(headers, top_level):
    return [(item[0] - top_level + 1, item[1]) for item in headers]

In [8]:
top_level = get_top_level(headers)
headers = shift_level(headers, top_level)

print(headers)

add_counter = True

counter = 0
for i in range(len(headers)):
    header = headers[i]
    if header[0] == 1:
        counter += 1
        if add_counter:
            print("- ", header[1])

[(1, 'Overview'), (2, 'Classical Pipeline'), (2, 'Timestamp Manager'), (2, 'VLIW Pipelane'), (3, 'Mask Manager'), (3, 'Microcode Unit'), (3, 'Address Resolver'), (2, 'Device Event Dispatcher'), (2, 'Timing Control Unit'), (2, 'Binary Control'), (1, 'Example'), (2, 'Quantum Wait (`QWAIT 10000`)'), (2, 'Set Mask (`SMIS` & `SMIT`)'), (2, 'Quantum Bundle'), (3, '_pre_interval_ (`2`)'), (3, 'Single-qubit operation (`X s8`)'), (3, 'Two-qubit Operation (`CZ t17`)')]
-  Overview
-  Example


In [None]:
def gen_toc(headers):
    top_level = get_top_level(headers)
    headers = shift_level(headers, top_level)
    for i in range(len(headers)):
        for 