# report_generator

In [43]:
from datetime import datetime
import pandas as pd

In [44]:
html_top = ''' 
<html>
    <header>
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <title>validator2</title>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css">
    </header>
    <body>
'''
html_bottom = ''' 
    </body>
</html>
'''

In [45]:
def read_occurrence_table(path):
    '''Reads occurrence.txt and from path and returns a pandas dataframe'''
    df = pd.read_csv(path, sep='|')
    df['eventDate'] = pd.to_datetime(df['eventDate'])
    return df

In [46]:
def write_occurrence_html(df, path):
    ''' Generate an HTML version of a dataframe and write it to path'''
    table = df.to_html(index=False, classes=['table is-striped'])
    with open(path, 'w') as f:
        f.write(f'{html_top}\n{table}\n{html_bottom}') 

In [47]:
def generate_report_html():
    html = f'''
        {html_top}
   
    <section class="section">
        <div class="container">

            <h1 class="title is-1">CRB Geographic Distribution Database</h1>

            <p>Generated by <b>crbdist/report_generator.ipynb</b> at {datetime.utcnow()} UTC</p>
            <p>Darwin Core Archive (DwCA): https://github.com/aubreymoore/crbdist/DwCA.zip</p>
            <p>GitHub Pages: <a href="http://aubreymoore.github.io/crbdist">http://aubreymoore.github.io/crbdist</a></p>
            <p class="mb-4">GitHub Repository: <a href="https://github.com/aubreymoore/crbdist">https://github.com/aubreymoore/crbdist</a></p>


            <p class="mb-4">The objective of this project is to build a public database to facilitate sharing information on new geographical 
            occurrence records for the coconut rhinoceros beetle (CRB), <i>Oryctes rhinoceros</i>, a major pest of coconut and oil palm which 
            is currently spreading among Pacific Islands.</p>

            <p class="mb-4">Instead of building, maintaining, and hosting a custom database, I decided to publish data to the Global Biodiversity Information 
            Facility (GBIF) which already contains <a href="https://www.gbif.org/species/4995642">occurrence records for CRB</a>.
            My goal is to fill data gaps in the GBIF data for CRB and to provide code to facilitate easy access to all GBIF occurrence records for CRB.
            The first step is to build a <a href="https://ipt.gbif.org/manual/en/ipt/latest/dwca-guide">Darwin Core Archive</a> (DwCA) which is 
            a biodiversity informatics data used for uploading data to GBIF. The archive will contained first occurrence records of CRB for islands 
            in the Pacific and Indian Oceans and elsewhere.</p>

            <h2 class="title is-2">Darwin Core Archive (under construction)</h2>

            <p class="mb-4">The Darwin Core Archive <a href="DwCA.zip">DwCA.zip</a> is a compressed ZIP file containing the following text files.</p>

            <p><a href="DwCA/occurrence.txt">DwCA/occurrence.txt</a> A delimited text file containing first country and island occurrence records for CRB.</p>                       
            <p><a href="DwCA/meta.xml">DwCA/meta.xml</a> A XML text file containing definitions for fields in occurrence.txt.</p>                      
            <p><a href="">DwCA/eml.xml</a> A XML text file containing metadata describing the DwCA dataset.</p>

            <h2 class="title is-2 mt-4">Raw Data</h2>

            <p>Data dictionary</p>
            <p><a href="data_table.html">Data table</a></p>
            
            <h2 class="title is-2 mt-4">Visualization</h2>

            <p><a href="crb_distribution.gif">crb_distribution</a> An map in animated GIF format showing spread of CRB over time.
            <p><a href="timeline.html">timeline.html</a> Timeline of new island records for CRB</a></p> 

        </div>
    </section>
    '''
    return f'{html_top}\n{html}\n{html_bottom}'

In [48]:
# MAIN

df_occ = read_occurrence_table('../DwCA/occurrence.txt')
write_occurrence_html(df_occ, '../occurrence.html')

# title_html = generate_title_html()


report_html = generate_report_html()
with open('../index.html', 'w') as f:
    f.write(report_html)

print('FINISHED')

FINISHED
