# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [1]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from pybtex.database import BibliographyData, Entry
from pybtex.utils import OrderedCaseInsensitiveDict
from time import strptime
import string
import html
import os
import re
import copy

In [2]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "conferences": {
        "file" : "biblio_c.bib",
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publ_conferences",
                        "permalink":"/publication/"}
        
    },
    "journals":{
        "file": "biblio_j.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publ_journals",
                        "permalink":"/publication/"}
    },
    "workshops":{
        "file": "biblio_w.bib",
        "venuekey" : ["journal", "booktitle"],
        "venue-pretext" : "",
        "collection" : {"name":"publ_workshops",
                        "permalink":"/publication/"}
    },
    "book":{
        "file": "biblio_b.bib",
        "venuekey" : ["journal", "booktitle"],
        "venue-pretext" : "",
        "collection" : {"name":"publ_preparation",
                        "permalink":"/publication/"}
    }, 
    "abstract":{
        "file": "biblio_a.bib",
        "venuekey" : ["journal", "booktitle"],
        "venue-pretext" : "",
        "collection" : {"name":"publ_preparation",
                        "permalink":"/publication/"}
    }, 
    "thesis":{
        "file": "biblio_t.bib",
        "venuekey" : ["school"],
        "venue-pretext" : "",
        "collection" : {"name":"publ_preparation",
                        "permalink":"/publication/"}
    },
    "preparation":{
        "file": "biblio_u.bib",
        "venuekey" : ["journal", "booktitle"],
        "venue-pretext" : "",
        "collection" : {"name":"publ_preparation",
                        "permalink":"/publication/"}
    }
}

In [3]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

def all_escape(text):
    return text.replace('\\textbf', '').replace('\%', '%'). replace('{', '').replace('}', '').replace('\\emph', '').replace('\\quotes', '')

In [4]:
exclude_keys = ['archivePrefix', 
                'acceptance', 
                'code', 
                'slides', 
                'poster', 
                'talk', 
                'supplementary', 
                'abstract', 
                'timestamp', 
                'biburl', 
                'bibsource',
                'rankCORE',
                'rankGGS',
                'rankSJR']

for ii, pubsource in enumerate(publist):
    counter = 100 * ii
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])

    #loop through the individual references in a given bibtex file
    bib_ids = [bib_id for bib_id in bibdata.entries]
    for bib_id in reversed(bib_ids):
        
        #print(bibdata.entries[bib_id])
        
        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"
        
        print(bib_id)
        
        
        b = bibdata.entries[bib_id].fields
        
        try:
            pub_year = f'{b["year"]}'
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")

        #todo: this hack for month and day needs some cleanup
        if "month" in b.keys(): 
            if(len(b["month"])<3):
                pub_month = "0"+b["month"]
                pub_month = pub_month[-2:]
            elif(b["month"] not in range(12)):
                tmnth = strptime(b["month"][:3],'%b').tm_mon   
                pub_month = "{:02d}".format(tmnth) 
            else:
                pub_month = str(b["month"])
        if "day" in b.keys(): 
            pub_day = str(b["day"])


        pub_date = pub_year+"-"+pub_month+"-"+pub_day

        #strip out {} as needed (some bibtex entries that maintain formatting)
        clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

        url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
        url_slug = url_slug.replace("--","-")

        counter_str = '%04d' % counter 
        counter += 1
        md_filename = (counter_str + '-' + str(pub_year) + "-" + url_slug + ".md").replace("--","-")
        html_filename = (counter_str + '-' + str(pub_year) + "-" + url_slug).replace("--","-")

        #Build Citation from text
        citation = ""
        authors_str = ""

        #citation authors - todo - add highlighting for primary author?
        els = bibdata.entries[bib_id].persons["author"]
        #print(bibdata.entries[bib_id].persons["author"])
        for i, author in enumerate(els):
            if i == len(els) - 1:
                sep = ''
            elif i == len(els) - 2:
                sep = ', and '
            else:
                sep = ', '
            authors_str = authors_str+" "+' '.join(author.first_names)+" "+' '.join(author.middle_names) + " "+' '.join(author.last_names)+ sep

        #citation title
        citation = authors_str + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""

        #add venue logic depending on citation type
        venuekeys = publist[pubsource]["venuekey"] if isinstance(publist[pubsource]["venuekey"], list) else [publist[pubsource]["venuekey"]]
        for vk in venuekeys:
            ven = b.get(vk)
            if ven is not None:
                break

        if ven is not None:
            venue = publist[pubsource]["venue-pretext"]+ven.replace("{", "").replace("}","").replace("\\","")
        else:
            venue = ""


        html_escaped_venue = html_escape(venue)
        citation = citation + " " + html_escaped_venue
        if len(html_escaped_venue) > 0 and pub_year != '1900':
            citation = citation + ", " + pub_year
        elif len(html_escaped_venue) == 0 and pub_year != '1900':
            citation = citation + pub_year


        ## YAML variables
        md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'

        md += """collection: """ +  publist[pubsource]["collection"]["name"]

        md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename

        note = False
        if "note" in b.keys():
            if len(str(b["note"])) > 5:
                md += "\nnote: '" + all_escape(html_escape(b["note"])) + "'"
                note = True

        acceptance = False
        if "acceptance" in b.keys():
            if len(str(b["acceptance"])) > 5:
                md += "\nacceptance: '" + all_escape(html_escape(b["acceptance"])) + "'"
                acceptance = True

        if 'rankCORE' in b.keys():
            md += "\nrankCORE: '" + all_escape(html_escape(b["rankCORE"])) + "'"
            
        if 'rankSJR' in b.keys():
            md += "\nrankSJR: '" + all_escape(html_escape(b["rankSJR"])) + "'"
            
        if 'rankGGS' in b.keys():
            md += "\nrankGGS: '" + all_escape(html_escape(b["rankGGS"])) + "'"


        #print(b.keys())
        abstract = False
        if "abstract" in b.keys():
            if len(str(b["abstract"])) > 5:
                abstract = True

        md += "\ndate: " + str(pub_date) 

        md += "\nvenue: '" + html_escape(venue) + "'"

        url = False
        if "url" in b.keys():
            if len(str(b["url"])) > 5:
                md += "\npaperurl: '" + b["url"] + "'"
                url = True

        md += "\npubtype: '" + html_escape(pubsource) + "'"

        md += "\nauthors: '" + html_escape(authors_str) + "'"
        md += "\ncitation: '" + html_escape(citation) + "'"


        #Bibtex entry

        this_entry = copy.deepcopy(bibdata.entries[bib_id])
        this_entry.fields =  OrderedCaseInsensitiveDict((key, this_entry.fields[key]) for key in this_entry.fields.keys() if key not in exclude_keys)

        new_bib_data = BibliographyData({bib_id : this_entry})

        #print(new_bib_data.to_string('bibtex'))

        new_bib_data.to_file('../files/bibtex/' + bib_id + '.bib', 'bibtex')

        bib_str = new_bib_data.to_string('bibtex')

        if not (pubsource == 'preparation' or (note and 'To appear' in b["note"])):
            md += "\nbibtexfile: '" + "/files/bibtex/" + bib_id + ".bib'"


        md += "\n---"


        ## Markdown description for individual page
        if abstract:
            md += "\n" + "Abstract\n <br> " + html_escape(b["abstract"]) + " <br> \n"

        if url:
            md += "\n [[Link](" + b["url"] + "){:target=\"_blank\"}] " 
        for field in ['poster', 'slides', 'code', 'talk', 'supplementary']:
            if field in b.keys():
                if len(str(b[field])) > 5:
                    md += "[[" + field.capitalize() + "](" + b[field] + "){:target=\"_blank\"}] " 

        #else:
        #    md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"
        if not (pubsource == 'preparation' or (note and 'To appear' in b["note"])):
            md += "[[BibTeX](" + "/files/bibtex/" + bib_id  + ".bib" +"){:target=\"_blank\"}] " 
            md += "\n" + "<pre> " + bib_str[:-1] + " </pre>" + "\n"
            #md += "\n" + "<input type=\"hidden\" id=\"bibtex\" name=\"bibtex\" value=\""+ "Vediamo" + "\">"
            #md += "<button class=\"btn btn-primary\" type=\"copyToClipboard('#bibtex')\">Copy BibTeX</button>"

        md_filename = os.path.basename(md_filename)



        with open("../_publications/" + md_filename, 'w') as f:
            f.write(md)
        print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
    # field may not exist for a reference
        
    counter += 100


metelli2017compatible
SUCESSFULLY PARSED metelli2017compatible: " Compatible Reward Inverse Reinforcement Learning  "
metelli2018configurable
SUCESSFULLY PARSED metelli2018configurable: " Configurable Markov Decision Processes  "
metelli2018policy
SUCESSFULLY PARSED metelli2018policy: " Policy Optimization via Importance Sampling  "
papini2019optimistic
SUCESSFULLY PARSED papini2019optimistic: " Optimistic Policy Optimization via Multiple Importance Sampl ... "
metelli2019reinforcement
SUCESSFULLY PARSED metelli2019reinforcement: " Reinforcement Learning in Configurable Continuous Environmen ... "
beraha2019feature
SUCESSFULLY PARSED beraha2019feature: " Feature Selection via Mutual Information: New Theoretical In ... "
metelli2019propagating
SUCESSFULLY PARSED metelli2019propagating: " Propagating Uncertainty in Reinforcement Learning via Wasser ... "
doro2020gradient
SUCESSFULLY PARSED doro2020gradient: " Gradient-Aware Model-Based Policy Search  "
ramponi2020truly
SUCESSFULLY PARSED

PybtexError: unable to open biblio_u.bib. No such file or directory