In [1]:
import bibmanager
from pathlib import Path

with open('../publication/export-bibtexabs.bib') as outfile:
    lines = outfile.readlines()

In [2]:
begin = False
current = []
entries = {}
entryname = ''
for line in lines:
    if not begin:
        if line.startswith('@'):
            begin = True
            entryname = line[line.find('{')+1:-1]
            entryname = entryname.replace(',','')

    if begin:
        current.append(line)
        
        if line.strip().startswith('adsnote'):
            begin = False
            current.append('}\n')
            entries[entryname] = ''.join(current)
            print(f"found entry: '{entryname}'. {len(current)} lines long.")
            current = []


found entry: '2022MNRAS.517.2056G'. 43 lines long.
found entry: '2022ApJ...939L...8S'. 41 lines long.
found entry: '2022arXiv221100038K'. 44 lines long.
found entry: '2022arXiv221009402K'. 53 lines long.
found entry: '2022arXiv221006993K'. 46 lines long.
found entry: '2022arXiv220902546G'. 47 lines long.
found entry: '2022A&A...665A.123M'. 54 lines long.
found entry: '2022arXiv220802782P'. 47 lines long.
found entry: '2022ApJ...935..167A'. 32 lines long.
found entry: '2022ApJ...925..175S'. 44 lines long.
found entry: '2021A&A...652A.136K'. 65 lines long.
found entry: '2021A&A...651A..81B'. 57 lines long.
found entry: '2021ApJ...912...46B'. 33 lines long.
found entry: '2021A&A...649A.163K'. 50 lines long.
found entry: '2021jwst.prop.2122A'. 10 lines long.
found entry: '2021jwst.prop.2114A'. 10 lines long.
found entry: '2021A&A...647A..93P'. 58 lines long.
found entry: '2021A&A...646A.119P'. 60 lines long.
found entry: '2020ApJ...902....6S'. 49 lines long.
found entry: '2020ApJ...900..14

In [3]:
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime, timedelta
from pylatexenc.latex2text import LatexNodes2Text

@dataclass
class Paper:
    title: str
    authors: List[str]
    bibcode: str
    doi: str
    date: datetime
    publication_types: int #Legend: 0 = Uncategorized; 2 = Journal article; 3Preprint / Working Paper; # 7 = Thesis
    publication: str # journal
    publication_short: str #abbrv
    
    adsurl: str
    arxivurl: Optional[str] = None
    
    abstract: str = ''
    summary: Optional[str] = None
    bib: Optional[bibmanager.bib_manager.Bib] = None
    
    shortname: str = ''

        
    @staticmethod
    def get_shortname(bib: bibmanager.bib_manager.Bib):
        shortname = bib.get_authors('ushort').replace('{','').replace('}','').replace('\\','').replace(' ','')
        return f"{shortname}{bib.year}"
    
    @staticmethod
    def get_authors(bib: bibmanager.bib_manager.Bib):
        latex ='authors:\n-  '+'\n- '.join(bib.get_authors('long').split(';'))
        return LatexNodes2Text().latex_to_text(latex)
    
    def get_links(self):
        links = f"links:\n- name: NASA ADS\n  url: {self.adsurl}"
        if self.arxivurl is not None:
            links+=f"\nurl_pdf: {self.arxivurl}"
        return links

    
    @staticmethod
    def find_journal(content: str, as_macro: bool = False):
        ji = content.find('journal =')
        if ji == -1:
            return None
        jf = content.find('},',ji)
        journal = content[ji+9:jf].strip().strip('{')
        if as_macro:
            return journal
        return journal.replace('\\','')
    
    @classmethod
    def from_bib(cls, bib: bibmanager.bib_manager.Bib):
        abstract = bib.abstract if hasattr(bib, 'abstract') else ''
        journal = cls.find_journal(bib.content)
        if journal is None: journal = bib.bibcode
        return cls(title = bib.title,
            authors = bib.authors
            ,bibcode = bib.bibcode
            ,doi = bib.doi
            ,date = datetime(bib.year,bib.month,1)
            ,publication_types = 2
            ,publication = journal
            ,publication_short = journal
            ,adsurl = bib.adsurl
            ,arxivurl = f"https://arxiv.org/abs/{bib.eprint}",
            abstract=abstract,
            bib = bib,
            shortname = cls.get_shortname(bib))
    
    def markdown(self):
        """Get markdown of paper.
        """
        d = datetime.now() + timedelta(hours=4)
        abstract = self.abstract
        if abstract == '':
            abstract = self.title
        
        return (
            "---\n"
            f"title: \"{self.title}\"\n"
            f"{self.get_authors(self.bib)}\n"
            f"date: \"{self.date.isoformat()}\"\n"
            f"doi: \"{self.doi}\"\n"
            f"publishDate: \"{d.isoformat()}\"\n"
            f"publication_types: [\"{self.publication_types}\"]\n"
            f"publication: {self.publication}\n"
            f"publication_short: {self.publication}\n"
            f"abstract: \"{abstract}\"\n"
            f"tags:\n- Astronomy\n"
            f"{self.get_links()}\n"
            "projects: []\n"
            "---\n")
        
    def __repr__(self):
        return self.shortname
    
    def __str__(self):
        return self.shortname
    

In [10]:
bib = bibmanager.bib_manager.Bib('''@MISC{2021jwst.prop.2114A,
       author = {{Ashall}, Chris and {Baron}, Eddie and {Hoeflich}, Peter A. and {Baade}, Dietrich and {Brown}, Peter J. and {Burns}, Chris and {Burrow}, Anthony and {Cikota}, Aleksandar and {Davis}, Scott and {DerKacy}, James M. and {Do}, Aaron Joshua Matsuo and {Fisher}, Alec Jameson and {Galbany}, Lluis and {Hsiao}, Eric and {Karamehmetoglu}, Emir and {Krisciunas}, Kevin and {Kumar}, Sahana and {LU}, JING and {Mera Evans}, Tyco Brahe and {Morrell}, Nidia and {Patat}, Ferdinando and {Phillips}, Mark M. and {Shahbandeh}, Melissa and {Shappee}, Benjamin John and {Stritzinger}, Maximillian and {Suntzeff}, Nicholas B. and {Telesco}, Charles and {Tucker}, Michael and {Wang}, Lifan and {Yang}, Yi and {de Jaeger}, Thomas},
        title = "{MIR Spectroscopy of Type Ia Supernovae: The Key to Unlocking their Explosions and Element Production}",
 howpublished = {JWST Proposal. Cycle 1, ID. \#2114},
         year = 2021,
        month = mar,
        pages = {2114},
       adsurl = {https://ui.adsabs.harvard.edu/abs/2021jwst.prop.2114A},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}''')
paper = Paper.from_bib(bib)
bib.content.find('journal =')

-1

In [4]:
bibs = {}
papers = {}

for entry, payload in entries.items():
    #print(entry)
    try:
        bib = bibmanager.bib_manager.Bib(payload)
        paper = Paper.from_bib(bib)
        bibs[bib.bibcode] = bib
        papers[bib.bibcode] = paper
    except ValueError as e:
        print(f"{entry} error: {e}")
    


2019ApJ...873...92B error: Mismatched braces in entry.


In [5]:
failed = []
for paper in papers.values():
    try:
        base = Path(f"../../content/publication/paper_{paper}/".replace('+','_')).resolve()
        base.mkdir(parents=True, exist_ok=True)
        index = base / 'index.md'
        with index.open('w') as outfile:
            written = outfile.write(paper.markdown())

        bibfile = base / "cite.bib"
        with bibfile.open('w') as outfile:
            written = outfile.write(paper.bib.content)
        print("wrote:", base, bibfile, index)
    except Exception as e:
        failed.append(paper.bibcode)
        print(paper.bibcode, e)

wrote: /home/emir/Software/personal_website/content/publication/paper_Gutierrez_2022 /home/emir/Software/personal_website/content/publication/paper_Gutierrez_2022/cite.bib /home/emir/Software/personal_website/content/publication/paper_Gutierrez_2022/index.md
wrote: /home/emir/Software/personal_website/content/publication/paper_Stritzinger_2022 /home/emir/Software/personal_website/content/publication/paper_Stritzinger_2022/cite.bib /home/emir/Software/personal_website/content/publication/paper_Stritzinger_2022/index.md
wrote: /home/emir/Software/personal_website/content/publication/paper_Kwok_2022 /home/emir/Software/personal_website/content/publication/paper_Kwok_2022/cite.bib /home/emir/Software/personal_website/content/publication/paper_Kwok_2022/index.md
wrote: /home/emir/Software/personal_website/content/publication/paper_Karamehmetoglu_2022 /home/emir/Software/personal_website/content/publication/paper_Karamehmetoglu_2022/cite.bib /home/emir/Software/personal_website/content/publi