In [12]:
import json
import re
from datetime import datetime

In [2]:
# Export item > BetterBibTex JSON
with open('data/all/BIB_saef_20141114.json', 'r') as f:
    bib = json.load(f)

In [3]:
# Author search experimentation
for b in bib['items']:
    if b['itemType'] == 'dataset':
        if b['date'].endswith('2024'):
            author_list = re.findall(r"saef: .*|saef:.*", b['extra'])
            print(author_list)

['saef: StevenChown; CassandraBrooks']
['saef: JasmineLee; StevenChown']
['saef: JuanSandino; BarbaraBollard; KrystalRandall; JohanBarthélemy; SharonRobinson; FelipeGonzalez']
['saef:JuanSandino;JohanBarthélemy;KrystalRandall;SharonRobinson;FelipeGonzalez;BarbaraBollard']
['saef: RodolfoAnderson']


In [4]:
#
author_outputs = {}
for b in bib['items']:
    try:
        # CamelCaps author name prepended by "saef:" in the extra field
        saef_author_list = re.findall(r"saef: .*|saef:.*", b['extra'])
        for names in saef_author_list:
            names_tidy  = re.sub('saef:', '', names, flags=re.IGNORECASE)
            names_split = names_tidy.strip().split(';') # Create a list of saef names

            for name in names_split:
                ns = name.strip()

                if ns in author_outputs:
                    author_outputs[ns].append(b['key'])
                else:
                    author_outputs[ns] = [b['key']]
    except:
        # catch output without a saef shouldn't exist but mistakes happen
        if 'extra' in author_outputs:
            author_outputs['extra'].append(b['key'])
        else:
            author_outputs['extra'] = [b['key']]

In [98]:
def printDate(dt):
    dt_1 = dt.replace("/", "-")
    dt_1 = dt_1[ :10] if len(dt_1) > 10 else dt_1
    try:
        if re.match(r"\d{1,2}-\d{1,2}-\d{4}", dt_1):    # dd-mm-yyyy
            return(datetime.strptime(dt_1, "%d-%m-%Y").date().strftime("%d %B %Y"))
        elif re.match(r"\d{4}-\d{1,2}-\d{1,2}", dt_1):  # yyyy-mm-dd
            return(datetime.strptime(dt_1, "%Y-%m-%d").date().strftime("%d %B %Y"))
        elif re.match(r"\d{1,2}-\d{4}", dt_1):          # mm-yyyy
            return(datetime.strptime(dt_1, "%m-%Y").date().strftime("%B %Y"))
        elif re.match(r"\d{4}-\d{1,2}", dt_1):          # yyyy-mm
            return(datetime.strptime(dt_1, "%Y-%m").date().strftime("%B %Y"))
        else:
            return(dt_1)
    except:
        return(f"error {dt_1}")

# for b in bib['items']:
#     print(printDate(b['date']))


In [103]:
# Build a bibliography
bibliography = {}
for b in bib['items']:
    # publication year, yyyy
    dt = re.findall(r"\d{4}", b['date'])
    dt = "".join(dt)

    try:
        authorship = ""
        for a in b['creators']:
            authorship += f"{a['lastName']} {a['firstName']}, "
        authorship = authorship[:-2]
    except:
        authorship = "missing"
    
    try:
        doi = ", DOI:" + b['DOI'] 
    except:
        doi = ""

    try:
        url = ", " + b['url'] 
    except:
        url = ""

    if b['itemType'] == 'artwork':
        citation = f"{authorship} ({dt}) {b['title']}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'blogPost':
        citation = f"{authorship} ({dt}) {b['title']}{url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'bookSection':
        citation = f"{authorship} ({dt}) {b['title']}{doi}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'conferencePaper':
        citation = f"{authorship} ({dt}) {b['title']}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'dataset':
        citation = f"{authorship} ({dt}) {b['title']}{doi}"
        bibliography[b['key']] = citation[:-2]
    elif b['itemType'] == 'film':
        citation = f"{authorship} ({dt}) {b['title']}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'journalArticle':
        try:
            vol = ", " + b['volume'] 
        except:
            vol = ""
        try:
            pg = ", " + b['pages'] 
        except:
            pg = ""
           
        citation = f"{authorship} ({dt}) {b['title']}, {b['publicationTitle']}{vol}{pg}{doi}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'letter':
        citation = f"{authorship} ({dt}) {b['title']}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'magazineArticle':
        citation = f"{authorship} ({dt}) {b['title']}{url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'newspaperArticle':
        citation = f"{authorship} ({dt}) {b['title']}{url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'presentation':
        citation = f"{authorship} ({dt}) {b['title']}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'radioBroadcast':
        radio_dt = printDate(b['date'])
        citation = f"{authorship} ({dt}) {b['title']}, {radio_dt} {url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'tvBroadcast':
        tele_dt = printDate(b['date'])
        citation = f"{authorship} ({dt}) {b['title']}, {tele_dt} {url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'videoRecording':
        vid_dt = printDate(b['date'])
        citation = f"{authorship} ({dt}) {b['title']}, {vid_dt} {url}"
        bibliography[b['key']] = citation
    elif b['itemType'] == 'webpage':
        citation = f"{authorship} ({dt}) {b['title']}{url}"
        bibliography[b['key']] = citation


In [104]:
bibliography

{'229BRPKA': 'Lappan Rachael, Chown Steven L., French Matthew, Perlaza-Jiménez Laura, Macesic Nenad, Davis Mark, Brown Rebekah, Cheng Allen, Clasen Thomas, Conlan Lindus, Goddard Frederick, Henry Rebekah, Knight Daniel R., Li Fuyi, Luby Stephen, Lyras Dena, Ni Gaofeng, Rice Scott A., Short Francesca, Song Jiangning, Whittaker Andrea, Leder Karin, Lithgow Trevor, Greening Chris (2024) Towards integrated cross-sectoral surveillance of pathogens and antimicrobial resistance: Needs, approaches, and considerations for linking surveillance to action, Environment International, 192, 109046, DOI:10.1016/j.envint.2024.109046',
 '23W466NZ': 'Barthélemy Johan, Randall Krystal, Tu Melody (2024) AI Investigates Antarctica’s Disappearing Moss to Uncover Climate Change Clues, https://developer.nvidia.com/blog/ai-investigates-antarcticas-disappearing-moss-to-uncover-climate-change-clues/',
 '25GDBSJU': "Chown Steven (2024) Living on the edge of ice: The haunting reality of top scientist Steven Chown's

McGeoch Melodie A., Clarke David A., Mungi Ninad Avinash, Ordonez Alejandro


In [28]:
'one' in x

True