# Development

- For each page, find and label each first instance of glossary terms from the appropriate glossary file
- Automatically create all indexes 


### Glossary Term Labels

Search through a glossary file and identify all terms defined therein.

In [32]:
# -------------------- GET TERMS FROM GLOSSARY -------------------- #

from pathlib import Path

def get_terms_from_glossary(glossary_path):
    with open(Path(glossary_path)) as glossary:
        content = glossary.read().splitlines()
        terms = list()
        for line in content:
            term = ""
            if line.count("\t") == 1:
                term = line.replace("\t", "")
                if term != "":
                    terms.append(term)
        glossary.close()
    return terms
    
get_terms_from_glossary("source/spiders/glossary.rst")

['Anthropod',
 'Hemocoel',
 'Pedicle',
 'Cephalothorax',
 'Hemocyanin',
 'Haemolymph',
 'Spiracles',
 'Ectotherms',
 'Dragline',
 'Opisthosoma',
 'Fecund']

Parse a page to correctly find and label the first instance of each relevant glossary term.

In [31]:
# -------------------- FIND GLOSSARY TERMS -------------------- #

import re
from pathlib import Path

def label_glossary_terms_in_page(page_path, terms):
    with open(Path(page_path)) as page:
        content = page.read()
        for term in terms:
            if (content.count(term) > 0):
                content = re.sub(f'{term}(?=[.,\s]|$)', f':term:`{term}`', content, count=1)
            if (content.count(term.lower()) > 0):
                content = re.sub(f'{term.lower()}(?=[.,\s]|$)', f':term:`{term.lower()}`', content, count=1)
        page.close()
    return content
    
terms = get_terms_from_glossary("source/spiders/glossary.rst")
content = label_glossary_terms_in_page("source/spiders/anatomy.rst", terms)
print(content)

**************************************************
Anatomy
**************************************************

Like other arthropods, spiders are coelomates in which the coelom is reduced to small areas around the reproductive and excretory systems. Its place is largely taken by a :term:`hemocoel`, a cavity that runs most of the length of the body and through which blood flows. The heart is a tube in the upper part of the body, with a few ostia that act as non-return valves allowing blood to enter the heart from the :term:`hemocoel` but prevent it from leaving before it reaches the front end. However, in spiders, it occupies only the upper part of the abdomen, and blood is discharged into the :term:`hemocoel` by one artery that opens at the rear end of the abdomen and by branching arteries that pass through the :term:`pedicle` and open into several parts of the :term:`cephalothorax`. Hence spiders have open circulatory systems. The blood of many spiders that have book lungs contains th

Write new content to a given page.

In [3]:
# -------------------- COMMIT PAGE CHANGES -------------------- #

from pathlib import Path

def commit_page_changes(page_path, content):
    with open(Path(page_path), "w") as page:
        page.write(content)
        page.close()
    
glossary_path = "source/spiders/glossary.rst"
terms = get_terms_from_glossary(glossary_path)

page_path = "source/spiders/anatomy.rst"
content = find_glossary_terms(page_path, terms)

commit_page_changes(page_path, content)

### Index Creation

Remove new lines at the end of a file

In [30]:
# -------------------- PRUNE ENDING NEWLINES -------------------- #

def prune_ending_newlines(page_path):
    with open(page_path, mode='r') as file:
            content = file.read()
            file.close()
    while content[-1] == "\n":
        content = content[0:-1]
    commit_page_changes(page_path, content)
    
page_path = "./source/index.rst"
prune_ending_newlines(page_path)

Remove all text after the toctree entry

In [33]:
# -------------------- REMOVE INDEX -------------------- #

def remove_index(index_fle_path):
    with open(index_file_path, mode='r') as file:
        content = file.read()
        content = content.split("\n.. toctree:")[0]
        file.close()
    commit_page_changes(index_file_path, content)
    prune_ending_newlines(index_file_path)

index_file_path = "./source/gilbert-sullivan/0-index.rst"
remove_index(index_file_path)

In [34]:
# -------------------- GLOBAL INDEX -------------------- #

import os

directory = "./source/"

def render_global_index(directory):
    directories = str()
    files = os.listdir(directory)
    for file in files:
        if file[0] != "." and file[0:2] != "0-":
            if os.path.isdir(directory+file):
                directories += f"\t{file}/0-index.rst\n"
    return "\n\n\n.. toctree::\n\t:maxdepth: 1\n\n"+directories+"\n"

# global_index = render_global_index(directory)
# print(global_index)


def commit_global_index(directory):
    index_file_path = directory+"index.rst"
    remove_index(index_file_path)
    index = render_global_index(directory)
    with open(index_file_path, mode='r') as file:
        content = file.read()
        file.close()
    commit_page_changes(index_file_path, content+index)

commit_global_index(directory)

In [35]:
# -------------------- LOCAL INDEX -------------------- #

import os

directory = "source/gilbert-sullivan"

def render_local_index(directory):
    directories = str()
    pages = str()
    files = os.listdir(directory)
    for file in files:
        if file[0] != "." and file[0:2] != "0-":
            if os.path.isdir(directory+"/"+file):
                directories += f"\t{file}/0-index.rst\n"
            elif os.path.isfile(directory+"/"+file):
                pages += f"\t{file}\n"
    return "\n\n\n.. toctree::\n\t:maxdepth: 1\n\n"+directories+pages+"\n"
    
# local_index = render_local_index(directory)
# print(local_index)


def commit_local_index(directory):
    index_file_path = directory+"/0-index.rst"
    remove_index(index_file_path)
    index = render_local_index(directory)
    with open(index_file_path, mode='r') as file:
        content = file.read()
        file.close()
    commit_page_changes(index_file_path, content+index)

commit_local_index(directory)

In [10]:
# -------------------- RECURSIVELY LIST DIRECTORIES -------------------- #

import os

def recursively_list_directories(root_directory, directories):
    for file in os.listdir(root_directory):
        if file[0] != ".":
            directory = os.path.join(root_directory, file)
            if os.path.isdir(directory):
                directories.append(directory)
                recursively_list_directories(directory, directories)
            
    
root_directory = "./source/"
directories = list()
recursively_list_directories(root_directory, directories)


In [40]:
# -------------------- COMMIT ALL LOCAL INDECIES -------------------- #

root_directory = "./source/"
remove_index(root_directory+"index.rst")
commit_global_index(root_directory)

# directories = list()
# recursively_list_directories(root_directory, directories)
# for directory in directories:
#     print(directory+"/0-index.rst")
#     remove_index(directory+"/0-index.rst")
#     commit_local_index(directory)
#     print("Done.")



In [39]:
directory = "./source/gilbert-sullivan"
remove_index(directory+"/0-index.rst")
commit_local_index(directory)

In [27]:
directory = "./source/spiders"

remove_index(directory+"/0-index.rst")
# index_file_path = directory+"/0-index.rst"
# with open(index_file_path, mode='r') as file:
#     content = file.read()
#     content = content.split("\n.. toctree:")[0]
# commit_page_changes(index_file_path, content)
# prune_ending_newlines(index_file_path)

commit_local_index(directory)

In [None]:
%reset -f

from scripts.commit_