# TOC Generation



In [7]:
import yaml
from yaml import load, dump
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

In [4]:
fn = "y12-syllabus-abb"

In [9]:
toc_str = """
format: jb-book
root: index
parts:
  - caption: The course and its tools
    chapters:
    - file: setup/00_index
      sections:
      - file: setup/01_syllabus
      - file: setup/10_installation
      - file: setup/20_communication
      - file: setup/30_assessment
  - caption: Networks
    chapters:
    - file: networks/00_index
      sections:
      - file: networks/10_models
      - file: networks/20_components
      - file: networks/30_performance

  - caption: Data management
    chapters:
    - file: databases/data-management
      sections:
      - file: databases/dbms/00-index
      - file: databases/dbms/01-data-information.md
      - file: databases/dbms/02-flat-relational.md
      - file: databases/dbms/03-rdbms.md
    - file: databases/concepts/00-index.md
      sections:
      - file: databases/concepts/10-organisation
      - file: databases/concepts/11-datatypes
      - file: databases/concepts/12-hierarchy
      - file: databases/concepts/13-relationships
      - file: databases/concepts/20-primary-foreign-keys.md
      - file: databases/concepts/30-composite-keys.md
      - file: databases/concepts/40-data-anomalies.md
"""

In [10]:
toc = load(toc_str, Loader)
toc

{'format': 'jb-book',
 'root': 'index',
 'parts': [{'caption': 'The course and its tools',
   'chapters': [{'file': 'setup/00_index',
     'sections': [{'file': 'setup/01_syllabus'},
      {'file': 'setup/10_installation'},
      {'file': 'setup/20_communication'},
      {'file': 'setup/30_assessment'}]}]},
  {'caption': 'Networks',
   'chapters': [{'file': 'networks/00_index',
     'sections': [{'file': 'networks/10_models'},
      {'file': 'networks/20_components'},
      {'file': 'networks/30_performance'}]}]},
  {'caption': 'Data management',
   'chapters': [{'file': 'databases/data-management',
     'sections': [{'file': 'databases/dbms/00-index'},
      {'file': 'databases/dbms/01-data-information.md'},
      {'file': 'databases/dbms/02-flat-relational.md'},
      {'file': 'databases/dbms/03-rdbms.md'}]},
    {'file': 'databases/concepts/00-index.md',
     'sections': [{'file': 'databases/concepts/10-organisation'},
      {'file': 'databases/concepts/11-datatypes'},
      {'file'

In [11]:
type(toc)

dict

In [14]:
t1_str = """
parts:
  - caption: The course and its tools
    chapters:
    - file: setup/00_index
      sections:
      - file: setup/01_syllabus
"""
t1 = load(t1_str, Loader)
t1

{'parts': [{'caption': 'The course and its tools',
   'chapters': [{'file': 'setup/00_index',
     'sections': [{'file': 'setup/01_syllabus'}]}]}]}

In [16]:
type(t1['parts']) # Parts is a list of parts

list

In [19]:
t1

{'parts': [{'caption': 'The course and its tools',
   'chapters': [{'file': 'setup/00_index',
     'sections': [{'file': 'setup/01_syllabus'}]}]}]}

Each part has a caption and a list of chapters

In [18]:
t1['parts'][0].keys()

dict_keys(['caption', 'chapters'])

Each chapter has a file and a list of sections.

In [20]:
t1['parts'][0]['chapters'][0].keys()

dict_keys(['file', 'sections'])

Each section has a list of files.

The syllabus-abbrev begins

    Unit 3 - Programming
    Programming skills and concepts - Knowledge
    *  program control structures, including:
    **  sequence
    **  selection
    **  iteration
    ***  post-test
    ***  pre-test
    *  characteristics of data types used in solutions, including:
    **  integer
    **  float
    **  string
    **  Boolean
    *  modular coding using functions, parameters and arguments
    *  scope of variables (Global, Local)
    *  characteristics of the following data structures:
    **  arrays
    ***  one-dimensional arrays
    ***  two-dimensional arrays
    **  dictionaries


Which we might read as

Part :: Programming
Chapter :: Programming skills and concepts - Knowledge
Section :: program control structures
Files :: Sequence, selection, iteration and so on.


In [112]:
def is_part(s):
    return 'Unit' in s

def is_chapter(s):
    return 'Skills' in s or 'Knowledge' in s

def is_section(s):
    return s.startswith('* ')

def is_page(s):
    return s.startswith('** ')

def is_heading(s):
    return s.startswith('*** ')

def strip(s):
    """Remove lead markings and newlines"""
    
    
def label(line):
    if is_part(line):
        _label = 'part'
        _line = line.strip()
    elif is_chapter(line):
        _label = 'chapter'
        _line = line.strip()
    elif is_section(line):
        _label = 'section'
        _line = line[3:].strip()
    elif is_page(line):
        _label = 'page'
        _line = line[4:].strip()
    elif is_heading(line):
        _label = 'heading'
        _line = line[5:].strip()
    else:
        _label = 'UNKNOWN'
        _line = line
    return _label, _line

In [113]:
s1 = 'Unit 3 - Programming\n'
s2 = 'Programming skills and concepts - Knowledge\n'
s3 = "*  program control structures, including:\n"
s4 = "**  sequence\n"
s5 = "***  problem description\n"
assert is_part(s1)
assert not is_part(s2)

assert is_chapter(s2)
assert not is_chapter(s1)

assert is_section(s3)
assert not is_section(s4)

assert is_page(s4)
assert not is_page(s3)

assert is_heading(s5)
assert not is_heading(s4)

In [114]:
s1.strip()

'Unit 3 - Programming'

In [115]:
s2.strip()

'Programming skills and concepts - Knowledge'

In [116]:
s3[3:].strip()

'program control structures, including:'

In [117]:
s4[4:].strip()

'sequence'

In [118]:
s5[5:].strip()

'problem description'

In [119]:
short_toc_str = """\
Unit 3 - Programming
Programming skills and concepts - Knowledge
*  program control structures, including:
**  sequence
**  selection
**  iteration
***  post-test
***  pre-test
*  characteristics of data types used in solutions, including:
**  integer
**  float
**  string
**  Boolean
*  modular coding using functions, parameters and arguments
*  scope of variables (Global, Local)
*  characteristics of the following data structures:
**  arrays
***  one-dimensional arrays
***  two-dimensional arrays
**  dictionaries"""

In [120]:
lines = short_toc_str.split("\n")
for line in lines:
    print(label(line))

('part', 'Unit 3 - Programming')
('chapter', 'Programming skills and concepts - Knowledge')
('section', 'program control structures, including:')
('page', 'sequence')
('page', 'selection')
('page', 'iteration')
('heading', 'post-test')
('heading', 'pre-test')
('section', 'characteristics of data types used in solutions, including:')
('page', 'integer')
('page', 'float')
('page', 'string')
('page', 'Boolean')
('section', 'modular coding using functions, parameters and arguments')
('section', 'scope of variables (Global, Local)')
('section', 'characteristics of the following data structures:')
('page', 'arrays')
('heading', 'one-dimensional arrays')
('heading', 'two-dimensional arrays')
('page', 'dictionaries')


In [121]:
codes = []
fn = "y12-syllabus-abbrev-clean.txt"
with open(fn) as src:
    for line in src:
        t = label(line)
        codes.append(t)
        print(t)

('part', 'Unit 3 - Programming')
('chapter', 'Programming skills and concepts - Knowledge')
('section', 'program control structures, including:')
('page', 'sequence')
('page', 'selection')
('page', 'iteration')
('heading', 'post-test')
('heading', 'pre-test')
('section', 'characteristics of data types used in solutions, including:')
('page', 'integer')
('page', 'float')
('page', 'string')
('page', 'Boolean')
('section', 'modular coding using functions, parameters and arguments')
('section', 'scope of variables (Global, Local)')
('section', 'characteristics of the following data structures:')
('page', 'arrays')
('heading', 'one-dimensional arrays')
('heading', 'two-dimensional arrays')
('page', 'dictionaries')
('chapter', 'Programming skills and concepts - Skills')
('section', 'apply, using pseudocode and a programming language, program control structures in solutions')
('section', 'apply, using pseudocode and a programming language, data types used in solutions as variables')
('section

In [122]:
len(codes)

389