# References

+ [Data Analysis and Visualization in Python for Ecologists](https://datacarpentry.org/python-ecology-lesson/)
+ [Data Management with SQL for Ecologists](https://datacarpentry.org/sql-ecology-lesson/)

## Glossary

*The definitions below are from the [Carpentries Glosario](https://glosario.carpentries.org/) (CC-BY-4.0)*

In [None]:
import os
import re
import shutil
import subprocess
import tempfile

import glosario
import IPython
from yaml import safe_load


GLOSSARY_PATH = os.path.join(glosario.__file__, "..", "data", "glossary.yml")


def parse_links(text, ignored):
    """Replace markdown links with anchors"""
    for val, href in re.findall(r"\[(.*?)\]\((#.*?)\)", text):
        if href.lstrip("#") in ignored:
            anchor = val
        else:
            anchor = f'<a href="{href}">{val}</a>'
        text = text.replace(f"[{val}]({href})", anchor)
    return re.sub(" +", " ", text)


def find_slugs(text):
    """Find term ids used by glosario"""
    return re.findall(r"\[.*?\]\(#(.*?)\)", text)


def update_glossary():
    """Extracts the current glossary from the glosario github repo"""
    with tempfile.TemporaryDirectory() as twd:
        subprocess.call(
            ["git", "clone", "https://github.com/carpentries/glosario"], cwd=twd
        )
        shutil.copy2(os.path.join(twd, "glosario", "glossary.yml"), GLOSSARY_PATH)


# List of slugs to include in the lesson glossary
slugs = [
    "argument",
    "attribute",
    "class",
    "cli",
    "data_frame",
    "function",
    "keyword_argument",
    "method",
    "object",
    "oop",
    "parameter",
    "psf",
    "python",
    "shell",
    "variable_program",
]

# List of slugs to ignore
ignored = [
    "s3",
    "tibble",
    "tidyverse",
]

defined = []

# Update the outdated glossary built into glosario
if os.path.getsize(GLOSSARY_PATH) <= 100000:
    update_glossary()

# Read glossary
with open(GLOSSARY_PATH, encoding="utf-8") as f:
    glossary = {t["slug"]: t for t in safe_load(f)}

# Look up and define each term. Collect additional glossary terms from the
# definitions and define those as well.
html = ["<dl>"]
defs = []
while slugs:
    new = []
    for slug in slugs:
        term = glossary[slug]["en"]
        defs.append(
            f'  <a id="{slug}"></a>\n'
            f"  <dt>{term['term']}</dt>\n"
            f"  <dd>{parse_links(term['def'].strip(), ignored)}</dd>"
        )
        defined.append(slug)
        new.extend(find_slugs(term["def"]))
    slugs = list({s for s in new if s not in defined + ignored})
defs.sort(key=lambda s: re.search("<dt>.*?</dt>", s).group().lower())
html.extend(defs)
html.append("</dl>")

# Display definitions as HTML
IPython.display.HTML("\n".join(html))