In [32]:
import requests
import yaml

In [95]:
latexAccents = {
  "à": "\\`a" , # Grave accent
  "è": "\\`e" ,
  "ì": "\\`{\\i}" ,
  "ò": "\\`o" ,
  "ù": "\\`u" ,
  "ỳ": "\\`y" ,
  "À": "\\`A" ,
  "È": "\\`E" ,
  "Ì": "\\`{\\I}" ,
  "Ò": "\\`O" ,
  "Ù": "\\`U" ,
  "Ỳ": "\\`Y" ,
  "á": "\\'a" , # Acute accent
  "ć": "\\'c" ,
  "é": "\\'e" ,
  "í": "\\'{\\i}" ,
  "ó": "\\'o" ,
  "ú": "\\'u" ,
  "ý": "\\'y" ,
  "Á": "\\'A" ,
  "É": "\\'E" ,
  "Í": "\\'{\\I}" ,
  "Ó": "\\'O" ,
  "Ú": "\\'U" ,
  "Ý": "\\'Y" ,
  "â": "\\^a" , # Circumflex
  "ê": "\\^e" ,
  "î": "\\^{\\i}" ,
  "ô": "\\^o" ,
  "û": "\\^u" ,
  "ŷ": "\\^y" ,
  "Â": "\\^A" ,
  "Ê": "\\^E" ,
  "Î": "\\^{\\I}" ,
  "Ô": "\\^O" ,
  "Û": "\\^U" ,
  "Ŷ": "\\^Y" ,
  "ä": "\\\"a" ,        # Umlaut or dieresis
  "ë": "\\\"e" ,
  "ï": "\\\"{\\i}" ,
  "ö": "\\\"o" ,
  "ü": "\\\"u" ,
  "ÿ": "\\\"y" ,
  "Ä": "\\\"A" ,
  "Ë": "\\\"E" ,
  "Ï": "\\\"{\\I}" ,
  "Ö": "\\\"O" ,
  "Ü": "\\\"U" ,
  "Ÿ": "\\\"Y" ,
  "ã": "\\~{a}" ,       # Tilde
  "ñ": "\\~{n}" ,
  "ă": "\\u{a}" ,       # Breve
  "ĕ": "\\u{e}" ,
  "ŏ": "\\u{o}" ,
  "š": "\\v{s}" ,       # Caron
  "č": "\\v{c}" ,
  "ž": "\\v{z}" ,
  "ç": "\\c{c}" ,       # Cedilla
  "Ç": "\\c{C}" ,
  "œ": "{\\oe}" ,       # Ligatures
  "Œ": "{\\OE}" ,
  "æ": "{\\ae}" ,
  "Æ": "{\\AE}" ,
  "å": "{\\aa}" ,
  "Å": "{\\AA}" ,
  "–": "--" ,   # Dashes
  "—": "---" ,
  "−": "--" ,
  "ø": "{\\o}" ,        # Misc latin-1 letters
"Ø": "{\\O}" ,
  "ß": "{\\ss}" ,
  "¡": "{!`}" ,
  "¿": "{?`}" ,
  "\\": "\\\\" ,        # Characters that should be quoted
  "~": "\\~" ,
  "&": "\\&" ,
  "$": "\\$" ,
  "{": "\\{" ,
  "}": "\\}" ,
  "%": "\\%" ,
  "#": "\\#" ,
  "_": "\\_" ,
  "≥": "$\\ge$" ,       # Math operators
  "≤": "$\\le$" ,
  "≠": "$\\neq$" ,
  "©": "\copyright" , # Misc
  "ı": "{\\i}" ,
  "α": "$\\alpha$" ,
  "β": "$\\beta$" ,
  "γ": "$\\gamma$" ,
  "δ": "$\\delta$" ,
  "ε": "$\\epsilon$" ,
  "η": "$\\eta$" ,
  "θ": "$\\theta$" ,
  "λ": "$\\lambda$" ,
  "µ": "$\\mu$" ,
  "ν": "$\\nu$" ,
  "π": "$\\pi$" ,
  "σ": "$\\sigma$" ,
  "τ": "$\\tau$" ,
  "φ": "$\\phi$" ,
  "χ": "$\\chi$" ,
  "ψ": "$\\psi$" ,
  "ω": "$\\omega$" ,
  "°": "$\\deg$" ,
  "‘": "`" ,    # Quotes
  "’": "'" ,
  "′": "$^\\prime$" ,
  "“": "``" ,
  "”": "''" ,
  "‚": "," ,
  "„": ",," ,
  "\xa0": " " ,     # Unprintable characters
}

def replaceLatexAccents(str):
    import unicodedata
    s = unicodedata.normalize('NFC', str)
    return "".join([ latexAccents[c] if c in latexAccents else c for c in s ])


In [61]:
# doi = '10.1021/acsami.8b15684'
# doi = '10.1021/acsami.8b04600'
doi = '10.1016/j.jpcs.2020.109840'

In [3]:
bare_url = "http://api.crossref.org/"


def get_bib(doi):
    """
    Parameters
    ----------
        doi: str
    Returns
    -------
        found: bool
        bib: str
    """
    url = "{}works/{}/transform/application/x-bibtex"
    url = url.format(bare_url, doi)
    r = requests.get(url)
    found = False if r.status_code != 200 else True
    bib = r.content
    bib = str(bib, "utf-8")

    return found, bib

In [51]:
found, bib = get_bib(doi)

In [52]:
print(bib)

@article{Altintas_2018,
	doi = {10.1021/acsami.8b04600},
	url = {https://doi.org/10.1021%2Facsami.8b04600},
	year = 2018,
	month = {may},
	publisher = {American Chemical Society ({ACS})},
	volume = {10},
	number = {20},
	pages = {17257--17268},
	author = {Cigdem Altintas and Gokay Avci and Hilal Daglar and Ayda Nemati Vesali Azar and Sadiye Velioglu and Ilknur Erucar and Seda Keskin},
	title = {Database for {CO}2 Separation Performances of {MOFs} Based on Computational Materials Screening},
	journal = {{ACS} Applied Materials {\&} Interfaces}
}


In [64]:
def get_json(doi):
    """
    Parameters
    ----------
        doi: str
    Returns
    -------
        found: bool
        item: dict
            Response from crossref
    """

    url = "{}works/{}"
    url = url.format(bare_url, doi)
    r = requests.get(url)
    found = False if r.status_code != 200 else True
    item = r.json()

    return found, item


In [65]:
found, js = get_json(doi)

In [53]:
found, xml = get_xml(doi)

In [54]:
xml.content

b'<?xml version="1.0" encoding="UTF-8"?>\n<crossref_result xmlns="http://www.crossref.org/qrschema/3.0" version="3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.crossref.org/qrschema/3.0 http://www.crossref.org/schemas/crossref_query_output3.0.xsd">\r\n  <query_result>\r\n    <head>\r\n      <doi_batch_id>none</doi_batch_id>\r\n    </head>\r\n    <body>\r\n      <query status="resolved">\r\n        <doi type="journal_article">10.1021/acsami.8b04600</doi>\r\n        <crm-item name="publisher-name" type="string">American Chemical Society (ACS)</crm-item>\r\n        <crm-item name="prefix-name" type="string">American Chemical Society</crm-item>\r\n        <crm-item name="member-id" type="number">316</crm-item>\r\n        <crm-item name="citation-id" type="number">98250775</crm-item>\r\n        <crm-item name="journal-id" type="number">79960</crm-item>\r\n        <crm-item name="deposit-timestamp" type="number">2018052300091000769</crm-item>\r\n   

In [31]:
js['message']['short-container-title']

['Journal of Physics and Chemistry of Solids']

In [56]:
js['message'].keys()

dict_keys(['indexed', 'reference-count', 'publisher', 'issue', 'license', 'funder', 'content-domain', 'short-container-title', 'published-print', 'DOI', 'type', 'created', 'page', 'source', 'is-referenced-by-count', 'title', 'prefix', 'volume', 'author', 'member', 'published-online', 'reference', 'container-title', 'original-title', 'language', 'link', 'deposited', 'score', 'subtitle', 'short-title', 'issued', 'references-count', 'journal-issue', 'alternative-id', 'URL', 'relation', 'ISSN', 'issn-type', 'subject'])

In [108]:
res = {}


In [109]:
res['References'] = {}
res['References']['Authors'] = []

In [110]:
res['Title'] = js['message']['title'][0]

In [111]:
if 'abstract' in js['message'].keys():
    res['Title'] = js['message']['title'][0]

In [112]:
author_list = js['message']['author']
a_list = []
for a in author_list:
    given = a['given']
    family = a['family']
    item = f'[[{given} {family}]]'
#     item = replaceLatexAccents(item)
    res['References']['Authors'].append(item)
#     print(item)
#     a_list.append(item)
res

{'References': {'Authors': ['[[César R. Monzón-González]]',
   '[[María Elena Sánchez-Vergara]]',
   '[[Wilmer E. Vallejo Narváez]]',
   '[[Tomás Rocha-Rinza]]',
   '[[Marcos Hernández]]',
   '[[Elizabeth Gómez]]',
   '[[Omar Jiménez-Sandoval]]',
   '[[Cecilio Álvarez-Toledano]]']},
 'Title': 'Synthesis and characterization of organotin(IV) semiconductors and their applications in optoelectronics'}

In [79]:
journal = js['message']['container-title']
journal_abbrev = js['message']['short-container-title']

if journal == journal_abbrev:
    print(f'[[{journal[0]}]]')
else:
    print(f'[[{journal[0]}]]' f'[[{journal_abbrev[0]}]]')

[[Journal of Physics and Chemistry of Solids]]


http://dx.doi.org/10.1016/j.jpcs.2020.109840


In [116]:
def get_bib_as_dict(json):
    res = {}
    
    res['Title'] = json['message']['title'][0]
    if 'abstract' in json['message'].keys():
        res['Abstract'] = json['message']['abstract'][0]
        
    res['References'] = {}
    res['References']['Authors'] = []
    author_list = json['message']['author']
    a_list = []
    for a in author_list:
        given = a['given']
        family = a['family']
        item = f'[[{given} {family}]]'
#         item = replaceLatexAccents(item)
        res['References']['Authors'].append(item.strip("'"))
    
    journal = json['message']['container-title']
    journal_abbrev = json['message']['short-container-title']

    if journal == journal_abbrev:
        res['Journal'] = f'[[{journal[0]}]]'
    else:
        res['Journal'] = f'[[{journal[0]}]]' f'[[{journal_abbrev[0]}]]'
    
    if 'url' in js['message'].keys():
        res['References']['URL'] = json['message']['url']
    elif 'URL' in js['message'].keys():
        res['References']['URL'] = json['message']['URL']
    
    return res

In [157]:
def get_title_str(json):
    t = json['message']['title'][0]
    title = f'- {t}\n'
    return title
def get_authors_str(json):
    t = json['message']['title'][0]
    title = f'- {t}\n'
    al = '- Reference:\n\t- Authors:\n'
    author_list = json['message']['author']
    a_list = []
    for a in author_list:
        given = a['given']
        family = a['family']
        item = f'[[{given} {family}]]'
        al += '\t\t- '+item +'\n'
#         res['References']['Authors'].append(item.strip("'"))
    return title+al
    

In [129]:
t = get_title_str(js)

'- Synthesis and characterization of organotin(IV) semiconductors and their applications in optoelectronics\n'

In [158]:
tsts = get_authors_str(js)

In [159]:
tsts

'- Synthesis and characterization of organotin(IV) semiconductors and their applications in optoelectronics\n- Reference:\n\t- Authors:\n\t\t- [[César R. Monzón-González]]\n\t\t- [[María Elena Sánchez-Vergara]]\n\t\t- [[Wilmer E. Vallejo Narváez]]\n\t\t- [[Tomás Rocha-Rinza]]\n\t\t- [[Marcos Hernández]]\n\t\t- [[Elizabeth Gómez]]\n\t\t- [[Omar Jiménez-Sandoval]]\n\t\t- [[Cecilio Álvarez-Toledano]]\n'

In [160]:
with open('./test4.md','w') as fout:
    fout.write(tsts)

In [117]:
dd = get_bib_as_dict(js)

In [118]:
print(yaml.dump(dd,sort_keys=False,allow_unicode=True))

Title: Synthesis and characterization of organotin(IV) semiconductors and their applications
  in optoelectronics
References:
  Authors:
  - '[[César R. Monzón-González]]'
  - '[[María Elena Sánchez-Vergara]]'
  - '[[Wilmer E. Vallejo Narváez]]'
  - '[[Tomás Rocha-Rinza]]'
  - '[[Marcos Hernández]]'
  - '[[Elizabeth Gómez]]'
  - '[[Omar Jiménez-Sandoval]]'
  - '[[Cecilio Álvarez-Toledano]]'
  URL: http://dx.doi.org/10.1016/j.jpcs.2020.109840
Journal: '[[Journal of Physics and Chemistry of Solids]]'



In [119]:
with open('./pa.md', 'r') as handler:
    ff = handler.read()

In [120]:
ff

'- TITLE\n- Abstract:\n    - \n- Reference:\n    - Authors:\n        - \n    - Journal:\n    - url: https://doi.org/\n- Notes:\n- Summary:\n- Concepts:\n- Keywords:\n'