In [15]:
from collections import defaultdict
import jinja2
from dwca.read import DwCAReader

In [16]:
DWCA = '/home/aubrey/Downloads/0263016-220831081235567.zip'

In [17]:
def build_dataframe(dwca):
    """ 
    Builds a dataframe from the core data file of a Darwin core archive.
    """
    with DwCAReader(DWCA) as dwca:
        print("Core data file is: {}".format(dwca.descriptor.core.file_location)) # => 'occurrence.txt'
        df = dwca.pd_read('occurrence.txt', parse_dates=True, low_memory=False)

    df.dropna(axis=1, how='all', inplace=True)
    df.sort_values(by=['order','family','scientificName'], inplace=True)
    return df

In [18]:
def build_taxonomy_json(df):
    """
    Returns a hierarchical JSON string from selected columns in dataframe.
    """

    def tree(): 
        return defaultdict(tree)

    taxonomy = tree()
    for i, r in df.iterrows():
        order = r['order']
        family= r['family']
        sciname = r['scientificName']
        taxonomy[order][family][sciname]
    return taxonomy

In [19]:
def build_treeview(taxonomy_json, indent):
    """
    Returns html code for an interactive hierarchical tree view.
    Data come from a hierarchical JSON string.
    """
    html_string = ''
    html_string = html_string + '  '*indent + '<ul class ="nested"> \n'
    for k, v in taxonomy_json.items():
        if isinstance(v, dict):
            if "name" in v.keys():
                html_string = html_string + '  '*indent + \
                    '<li><span class="caret">' + str(v["name"]) + ' : ' + \
                    '</span> \n '
            else:
                html_string = html_string + '  '*indent + \
                    '<li><span class="caret">' + str(k) + ' : ' + '</span> \n '
            html_string = html_string + build_treeview(v, indent+1) + '  '*indent + '</li> \n '
    html_string = html_string + '  '*indent + '</ul> \n '
    return html_string

In [20]:
# HTML TEMPLATE

template_string = """
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
ul, #myUL {
  list-style-type: none;
}

#myUL {
  margin: 0;
  padding: 0;
}

.caret {
  cursor: pointer;
  -webkit-user-select: none; /* Safari 3.1+ */
  -moz-user-select: none; /* Firefox 2+ */
  -ms-user-select: none; /* IE 10+ */
  user-select: none;
}

.caret::before {
  content: "\u25B6";
  color: black;
  display: inline-block;
  margin-right: 6px;
}

.caret-down::before {
  -ms-transform: rotate(90deg); /* IE 9 */
  -webkit-transform: rotate(90deg); /* Safari */'
  transform: rotate(90deg);
}

.nested {
  display: none;
}

.active {
  display: block;
}
</style>
</head>
<body>
<h1>List of Insect Taxa with Guam Occurrence Records Stored in the Global Biodiversity Information Facility (GBIF)</h1>

<ul id="myUL">
<li><span class="caret">class <b>Insecta</b></span>

{{ treeview }}

</li>
</ul>

<script>
var toggler = document.getElementsByClassName("caret");
var i;

for (i = 0; i < toggler.length; i++) {
  toggler[i].addEventListener("click", function() {
    this.parentElement.querySelector(".nested").classList.toggle("active");
    this.classList.toggle("caret-down");
  });
}
</script>

</body>
</html>
"""

In [21]:
# MAIN

df = build_dataframe(DWCA)
taxonomy_json = build_taxonomy_json(df)
treeview = build_treeview(taxonomy_json, 2)
    
environment = jinja2.Environment()
template = environment.from_string(template_string)
html = template.render(treeview=treeview)
with open('index.html', mode='w', encoding='utf-8') as f:
    f.write(html)
    
print('FINISHED')

Core data file is: occurrence.txt
FINISHED
