In [71]:
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from urllib.parse import quote
import datetime
import pytz

In [72]:
TITLE = 'Bibiolography of Coconut Rhinoceros Beetle in the Philippines'
CSV_PATH = 'crb-pi-bib.csv'
TEMPLATE_PATH = 'crb-pi-bib-template.html'
CSS_PATH = 'crb-pi-bib.css'
HTML_PATH = 'index.html'

In [73]:
# Read CSV

df = pd.read_csv(CSV_PATH)

# Reorder columns

df = df[['Key','Item Type','Author','Publication Year','Title','Url','File Attachments','Notes', 'Abstract Note', 'Extra']]

# Sort by author and year

df.sort_values(by=['Author','Publication Year'], inplace=True)

# reset index

df = df.reset_index(drop=True)

In [74]:
# create new column which contains a formatted reference

def format_ref(row):

    if pd.isna(row)['Author']:
        author = 'Unknown'
    else:
        author = row['Author']

    if pd.isna(row['Publication Year']):
        year = 0
    else:
        year = row['Publication Year']

    title = row['Title']

    if pd.isna(row['Url']):
        url = '' 
    else:
        url = row['Url']

    return f'{author} {int(year)}<br><b>{title}</b><br><a href="{url}">{url}</a>'

df['ref'] = df.apply(format_ref, axis=1)

In [75]:
# create abstract_exists column

def abstract_exists(row):
    return pd.notna(row['Abstract Note'])
df['abstract_exists'] = df.apply(abstract_exists, axis=1)

# create notes_exist column

def notes_exist(row):
    return pd.notna(row['Notes'])

df['notes_exist'] = df.apply(notes_exist, axis=1)

# create attachments_exist column

def attachments_exist(row):
    return pd.notna(row['File Attachments'])

df['attachments_exist'] = df.apply(attachments_exist, axis=1)

# create a new column for formatted attachment urls

def format_attachment_urls(row):
    if row['attachments_exist']:
    
        s = row['File Attachments']
        urls = s.split(';')

        hrefs = ''
        for url in urls:
            s = url.strip() # remove whitespace from ends of string
            s = quote(s)    # convert space characters to %20
            s = f'<a href="{s}">{s}</a><br>'
            hrefs += s
    
        return hrefs
    else:
        return ''

df['attachment_hrefs'] = df.apply(format_attachment_urls, axis=1)

# display df

# df[['abstract_exists','Abstract Note']]

In [76]:
def get_timestamp():
    return datetime.datetime.now(pytz.timezone('Asia/Manila')).isoformat('T', 'minutes')

# get_timestamp()

In [77]:
templateLoader = FileSystemLoader(searchpath=".")
templateEnv = Environment(loader=templateLoader)
template = templateEnv.get_template(TEMPLATE_PATH)
header = f'{TITLE}<br>by Aubrey Moore | {get_timestamp()}'
templateVars = {'df': df, 'header': header}
outputText = template.render(templateVars)

with open(HTML_PATH, 'w') as f:
    f.write(outputText)
print(f'The bibliography is saved in {HTML_PATH}.')
print('Finished')

The bibliography is saved in index.html.
Finished
