In [46]:
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from urllib.parse import quote

In [47]:
# Read CSV

df = pd.read_csv(
    'test.csv',  
    # usecols=['Item Type','Author','Publication Year','Title','File Attachments','Notes', 'Abstract Note', 'Extra'],
    nrows=1000)

print(df.columns)

# Reorder columns

df = df[['Item Type','Author','Publication Year','Title','Url','File Attachments','Notes', 'Abstract Note', 'Extra']]

# Sort by author and year

df.sort_values(by=['Author','Publication Year'], inplace=True)
df['Abstract Note']

Index(['Key', 'Item Type', 'Publication Year', 'Author', 'Title',
       'Publication Title', 'ISBN', 'ISSN', 'DOI', 'Url', 'Abstract Note',
       'Date', 'Date Added', 'Date Modified', 'Access Date', 'Pages',
       'Num Pages', 'Issue', 'Volume', 'Number Of Volumes',
       'Journal Abbreviation', 'Short Title', 'Series', 'Series Number',
       'Series Text', 'Series Title', 'Publisher', 'Place', 'Language',
       'Rights', 'Type', 'Archive', 'Archive Location', 'Library Catalog',
       'Call Number', 'Extra', 'Notes', 'File Attachments', 'Link Attachments',
       'Manual Tags', 'Automatic Tags', 'Editor', 'Series Editor',
       'Translator', 'Contributor', 'Attorney Agent', 'Book Author',
       'Cast Member', 'Commenter', 'Composer', 'Cosponsor', 'Counsel',
       'Interviewer', 'Producer', 'Recipient', 'Reviewed Author',
       'Scriptwriter', 'Words By', 'Guest', 'Number', 'Edition',
       'Running Time', 'Scale', 'Medium', 'Artwork Size', 'Filing Date',
       'Applicatio

17                                                   NaN
52     Due to the extensive stretches of date plantat...
90                                                   NaN
126    Anggraini E, Sinaga TM, Irsan C, Herlinda S, M...
66                                                   NaN
                             ...                        
82     Examination of populations of the coconut pest...
65                        The Nation's Leading Newspaper
92                                                   NaN
107    One dead coconut rhinoceros beetle was found o...
111    DEPARTMENT OF AGRICULTURE ʻOIHANA MAHIʻAI   JO...
Name: Abstract Note, Length: 132, dtype: object

In [None]:
# create new column which contains a formatted reference

def format_ref(row):

    if pd.isna(row)['Author']:
        author = 'Unknown'
    else:
        author = row['Author']

    if pd.isna(row['Publication Year']):
        year = 0
    else:
        year = row['Publication Year']

    title = row['Title']

    if pd.isna(row['Url']):
        url = '' 
    else:
        url = row['Url']

    return f'{author} {int(year)}<br><b>{title}</b><br><a href="{url}">{url}</a>'

df['ref'] = df.apply(format_ref, axis=1)

In [49]:
# create abstract_exists column

def abstract_exists(row):
    return pd.notna(row['Abstract Note'])
df['abstract_exists'] = df.apply(abstract_exists, axis=1)

# create notes_exist column

def notes_exist(row):
    return pd.notna(row['Notes'])

df['notes_exist'] = df.apply(notes_exist, axis=1)

# create attachments_exist column

def attachments_exist(row):
    return pd.notna(row['File Attachments'])

df['attachments_exist'] = df.apply(attachments_exist, axis=1)

# create a new column for formatted attachment urls

def format_attachment_urls(row):
    if row['attachments_exist']:
    
        s = row['File Attachments']
        urls = s.split(';')

        hrefs = ''
        for url in urls:
            s = url.strip() # remove whitespace from ends of string
            s = quote(s)    # convert space characters to %20
            s = f'<a href="{s}">{s}</a><br>'
            hrefs += s
    
        return hrefs
    else:
        return ''

df['attachment_hrefs'] = df.apply(format_attachment_urls, axis=1)

# display df

# df[['abstract_exists','Abstract Note']]

In [50]:
templateLoader = FileSystemLoader( searchpath="/" )
templateEnv = Environment( loader=templateLoader )
TEMPLATE_FILE = "/home/aubrey/Desktop/crb-pi-biblio/crb-pi-bib.template"
template = templateEnv.get_template( TEMPLATE_FILE )
templateVars = {'df': df}
outputText = template.render( templateVars )

with open('crb-pi-bib.html', 'w') as f:
    f.write(outputText)
print('The bibliography is saved in crb-pi-bib.html.')
print('Finished')

The bibliography is saved in crb-pi-bib.html.
Finished
