# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [6]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [7]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "journal":{
        "file": "mine.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    } 
}

In [8]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [39]:
b

OrderedCaseInsensitiveDict([('title', '{Multiple Components in the Broadband {\\ensuremath{\\gamma}}-Ray Emission of the Short GRB 160709A}'), ('journal', '\\apj'), ('keywords', 'gamma-ray burst: individual: GRB 160709A, Astrophysics - High Energy Astrophysical Phenomena'), ('year', '2019'), ('month', 'May'), ('volume', '876'), ('number', '1'), ('eid', '76'), ('pages', '76'), ('doi', '10.3847/1538-4357/ab0e72'), ('archivePrefix', 'arXiv'), ('eprint', '1910.05420'), ('primaryClass', 'astro-ph.HE'), ('adsurl', 'https://ui.adsabs.harvard.edu/abs/2019ApJ...876...76T'), ('adsnote', 'Provided by the SAO/NASA Astrophysics Data System')])

In [59]:
b["doi"]

'10.3847/1538-4357/ab0e72'

In [62]:

for pubsource in publist:
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    #loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:
        string = "*"
        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"
        
        b = bibdata.entries[bib_id].fields
        title ='* '+b["title"][1:-1]
        title = title.replace("{\ensuremath{\gamma}}", "Gamma")+"   "
        print(title)
        n_authors = len(bibdata.entries[bib_id].persons["author"])
        for i, author in enumerate(bibdata.entries[bib_id].persons["author"]):
            
            if i<2:
                string+=author.first_names[0][0]+". "+author.last_names[0][1:-1]+", "
            elif i == 2 and n_authors == 3:
                string+="and "+author.first_names[0][0]+". "+author.last_names[0][1:-1]
            elif i== 2:
                string+=author.first_names[0][0]+". "+author.last_names[0][1:-1]+" et. al.,"
                break
        if "apj" in b["journal"]:
            journal = "ApJ"
        string+=", "+b["year"]+", "+journal+", "+b["volume"]+", "+b["pages"]+"*   "
        print(string)
        
        additional+f"*[ADS](https://ui.adsabs.harvard.edu/abs/{bib_id}/abstract), "
        string+=f'<span class="__dimensions_badge_embed__" data-doi="{b["doi"]}" data-style="small_rectangle"></span><script async src="https://badge.dimensions.ai/badge.js" charset="utf-8"></script>*'
        print(string, )

* Impact of Ejecta Temperature and Mass on the Strength of Heavy Element Signatures in Kilonovae
*D. Tak, Z. Uhm, and J. Gillanders, 2024, ApJ, 967, 54([ADS](https://ui.adsabs.harvard.edu/abs/2024ApJ...967...54T/abstract))<span class="__dimensions_badge_embed__" data-doi="10.3847/1538-4357/ad3af4" data-style="small_rectangle"></span><script async src="https://badge.dimensions.ai/badge.js" charset="utf-8"></script>*
* Exploring the Impact of the Ejecta Velocity Profile on the Evolution of Kilonova: Diversity of the Kilonova Lightcurves
*D. Tak, Z. Uhm, and J. Gillanders, 2023, ApJ, 958, 121([ADS](https://ui.adsabs.harvard.edu/abs/2023ApJ...958..121T/abstract))<span class="__dimensions_badge_embed__" data-doi="10.3847/1538-4357/ad06b0" data-style="small_rectangle"></span><script async src="https://badge.dimensions.ai/badge.js" charset="utf-8"></script>*
* Temporal and Spectral Evolution of Gamma-Ray Burst Broad Pulses: Identification of High-latitude Emission in the Prompt Emission
*D. T

In [14]:
b["title"]

'{Impact of Ejecta Temperature and Mass on the Strength of Heavy Element Signatures in Kilonovae}'

In [9]:
for pubsource in publist:
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    #loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:
        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"
        
        b = bibdata.entries[bib_id].fields
        
        try:
            pub_year = f'{b["year"]}'

            #todo: this hack for month and day needs some cleanup
            if "month" in b.keys(): 
                if(len(b["month"])<3):
                    pub_month = "0"+b["month"]
                    pub_month = pub_month[-2:]
                elif(b["month"] not in range(12)):
                    tmnth = strptime(b["month"][:3],'%b').tm_mon   
                    pub_month = "{:02d}".format(tmnth) 
                else:
                    pub_month = str(b["month"])
            if "day" in b.keys(): 
                pub_day = str(b["day"])

                
            pub_date = pub_year+"-"+pub_month+"-"+pub_day
            
            #strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

            url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--","-")

            md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

            #Build Citation from text
            citation = ""

            #citation authors - todo - add highlighting for primary author?
            for author in bibdata.entries[bib_id].persons["author"]:
                citation = citation+" "+author.first_names[0]+" "+author.last_names[0]+", "

            #citation title
            citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""

            #add venue logic depending on citation type
            venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")

            citation = citation + " " + html_escape(venue)
            citation = citation + ", " + pub_year + "."

            
            ## YAML variables
            md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
            
            md += """collection: """ +  publist[pubsource]["collection"]["name"]

            md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
            
            note = False
            if "note" in b.keys():
                if len(str(b["note"])) > 5:
                    md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    note = True

            md += "\ndate: " + str(pub_date) 

            md += "\nvenue: '" + html_escape(venue) + "'"
            
            url = False
            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npaperurl: '" + b["url"] + "'"
                    url = True

            md += "\ncitation: '" + html_escape(citation) + "'"

            md += "\n---"

            
            ## Markdown description for individual page
            if note:
                md += "\n" + html_escape(b["note"]) + "\n"

            if url:
                md += "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n" 
            else:
                md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"

            md_filename = os.path.basename(md_filename)

            with open("../_publications/" + md_filename, 'w', encoding="utf-8") as f:
                f.write(md)
            print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
        # field may not exist for a reference
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
            continue


SUCESSFULLY PARSED 2024ApJ...967...54T: " {Impact of Ejecta Temperature and Mass on the Strength of He ... "
SUCESSFULLY PARSED 2023ApJ...958..121T: " {Exploring the Impact of the Ejecta Velocity Profile on the  ... "
SUCESSFULLY PARSED 2023ApJ...949..110T: " {Temporal and Spectral Evolution of Gamma-Ray Burst Broad Pu ... "
SUCESSFULLY PARSED 2023ApJ...945..101A: " {Search for Ultraheavy Dark Matter from Observations of Dwar ... "
SUCESSFULLY PARSED 2022ApJ...938L...4T: " {Current and Future {\ensuremath{\gamma}}-Ray Searches for D ... "
SUCESSFULLY PARSED 2020ApJ...890....9A: " {Fermi and Swift Observations of GRB 190114C: Tracing the Ev ... "
SUCESSFULLY PARSED 2019ApJ...886L..33A: " {Bright Gamma-Ray Flares Observed in GRB 131108A}  "
SUCESSFULLY PARSED 2019ApJ...883..134T: " {Closure Relations of Gamma-Ray Bursts in High Energy Emissi ... "
SUCESSFULLY PARSED 2019ApJ...876...76T: " {Multiple Components in the Broadband {\ensuremath{\gamma}}- ... "
