In [8]:
import requests
from functools import cache

@cache
def get_metadata_from_doi(doi):
    """
    Parameters: 
        doi (str): A string containing the DOI of a paper.
    
    Returns: 
        dict: A dictionary containing the metadata of the paper.
            - 'Title': Title of the article
            - 'Authors': List of authors
            - 'Date': Publication date
            - 'Journal': Journal name
    """

    base_url = "https://api.crossref.org/works/"
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    
    if doi.startswith("https://doi.org/"):
        doi = doi.replace("https://doi.org/", "")
    
    response = requests.get(base_url + doi, headers=headers)
    data = response.json()

    if "message" in data:
        message = data["message"]
        
        title = message.get("title", ["N/A"])[0]
        authors = [author["given"] + " " + author["family"] for author in message.get("author", [])]
        date = "-".join(map(str, message.get("created", {}).get("date-parts", [["N/A"]])[0]))
        journal = message.get("container-title", ["N/A"])[0]
        
        return {
            "Title": title,
            "Authors": authors,
            "Date": date,
            "Journal": journal
        }
    else:
        return {"Error": "Unable to fetch data for the given DOI."} 

def generate_jacs_citation(metadata):
    """
    Generate a JACS (Journal of the American Chemical Society) citation from metadata.

    Parameters:
        metadata (dict): Dictionary containing metadata of a given DOI.
        - 'Authors': List of authors
        - 'Title': Title of the article
        - 'Date': Publication date
        - 'Journal': Journal name

    Returns:
        citation (str): JACS citation
    """
    authors = metadata.get('Authors', [])
    title = metadata.get('Title', "N/A")
    date = metadata.get('Date', "N/A")
    journal = metadata.get('Journal', "N/A")

    formatted_authors = "; ".join([f"{author.split(' ')[-1]}, {''.join([i[0] for i in author.split(' ')[:-1]])}" for author in authors])

    # TODO: how do I abbreviate journal names?
    journal_abbreviation = journal

    # TODO: what is volume number in italics, page numbers, first-last.   
    citation = f"{formatted_authors}.  {journal_abbreviation} {date}."
    
    return citation

# Example usage
doi_input = '10.1039/b904314d'
metadata = get_metadata_from_doi(doi_input)
for key, value in metadata.items():
    print(f"{key}: {value}")

citation = generate_jacs_citation(metadata)
print(citation)


Title: Ribosomal synthesis of dehydrobutyrine- and methyllanthionine-containing peptides
Authors: ['Yuki Goto', 'Kazuhiro Iwasaki', 'Kohei Torikai', 'Hiroshi Murakami', 'Hiroaki Suga']
Date: 2009-4-30
Journal: Chemical Communications
Goto, Y; Iwasaki, K; Torikai, K; Murakami, H; Suga, H.  Chemical Communications 2009-4-30.


In [2]:
import pandas as pd

data = pd.read_csv('SURP-database-reformatted - Flexizymes ready for database.csv', skiprows=1)
data.head()

Unnamed: 0,DOI (required),Synthetase mutant common name,Accession ID,Organism of origin,Mutations,Crystal structure PDB code,"Flexizyme name (aFx, dFx, eFx)",Monomer Names (required),Monomer structure (SMILES string) (Required),Monomer leaving group,...,N-terminal incorporation,Internal incorporation,Readout,Ribosomal incorporation notes,(if LC-MS) \nsequence of peptide,(if protein expression) \nprotein yield,Reaction Time,Yield of reaction (%),Km,Kcat
0,10.1002/cbic.201100104,,,,,,eFx,4-aminobutyric acid,O=C(OCC#N)[C@@H](NC(CCCN)=O)CC1=CC=CC=C1,CME,...,Yes,,LC-MS,,,,,,,
1,10.1002/cbic.201100104,,,,,,eFx,(R/S)-4-amino-3-hydroxy butyric acid,O=C(OCC#N)[C@@H](NC(CC(O)CN)=O)CC1=CC=CC=C1,CME,...,Yes,,LC-MS,,,,,,,
2,10.1002/cbic.201100104,,,,,,eFx,"(3-aminocyclohexane)carboxylic acid,",O=C(OCC#N)[C@@H](NC([C@H]1C[C@@H](N)CCC1)=O)CC...,CME,...,Yes,,LC-MS,,,,,,,
3,10.1002/cbic.201100104,,,,,,eFx,"(3S,4S)-4-amino-3-hydroxy-6-methylheptanoic acid",O=C(OCC#N)[C@@H](NC(C[C@H](O)[C@H](CC(C)C)N)=O...,CME,...,Yes,,LC-MS,,,,,,,
4,10.1002/cbic.201100104,,,,,,eFx,"(3S,4R)-4-amino-3-hydroxy-5-methylhexanoic acid",O=C(OCC#N)[C@@H](NC(C[C@H](O)[C@@H](C(C)C)N)=O...,CME,...,Yes,,LC-MS,,,,,,,


In [3]:
data_with_citations = data.copy()

for index, row in data.iterrows():
    doi = row['DOI (required)']  
    metadata = get_metadata_from_doi(doi)
    
    data_with_citations.at[index, 'Title'] = metadata['Title']
    data_with_citations.at[index, 'Authors'] = ", ".join(metadata['Authors'])
    data_with_citations.at[index, 'Date'] = metadata['Date']
    data_with_citations.at[index, 'Journal'] = metadata['Journal']

# Reordering columns
current_columns = list(data_with_citations.columns)
metadata_columns = ['Title', 'Authors', 'Date', 'Journal']
for col in metadata_columns:
    current_columns.remove(col)

doi_index = current_columns.index('DOI (required)')
new_columns = current_columns[:doi_index+1] + metadata_columns + current_columns[doi_index+1:]
data_with_citations = data_with_citations[new_columns]

data_with_citations.head()


Unnamed: 0,DOI (required),Title,Authors,Date,Journal,Synthetase mutant common name,Accession ID,Organism of origin,Mutations,Crystal structure PDB code,...,N-terminal incorporation,Internal incorporation,Readout,Ribosomal incorporation notes,(if LC-MS) \nsequence of peptide,(if protein expression) \nprotein yield,Reaction Time,Yield of reaction (%),Km,Kcat
0,10.1002/cbic.201100104,Ribosomal Synthesis of Backbone-Macrocyclic Pe...,"Yukinori Ohshiro, Eiji Nakajima, Yuki Goto, Sh...",2011-4-20,ChemBioChem,,,,,,...,Yes,,LC-MS,,,,,,,
1,10.1002/cbic.201100104,Ribosomal Synthesis of Backbone-Macrocyclic Pe...,"Yukinori Ohshiro, Eiji Nakajima, Yuki Goto, Sh...",2011-4-20,ChemBioChem,,,,,,...,Yes,,LC-MS,,,,,,,
2,10.1002/cbic.201100104,Ribosomal Synthesis of Backbone-Macrocyclic Pe...,"Yukinori Ohshiro, Eiji Nakajima, Yuki Goto, Sh...",2011-4-20,ChemBioChem,,,,,,...,Yes,,LC-MS,,,,,,,
3,10.1002/cbic.201100104,Ribosomal Synthesis of Backbone-Macrocyclic Pe...,"Yukinori Ohshiro, Eiji Nakajima, Yuki Goto, Sh...",2011-4-20,ChemBioChem,,,,,,...,Yes,,LC-MS,,,,,,,
4,10.1002/cbic.201100104,Ribosomal Synthesis of Backbone-Macrocyclic Pe...,"Yukinori Ohshiro, Eiji Nakajima, Yuki Goto, Sh...",2011-4-20,ChemBioChem,,,,,,...,Yes,,LC-MS,,,,,,,


In [4]:
file_name = "data_with_citations.csv"
data_with_citations.to_csv(file_name, index=False)

print(f"File saved as {file_name}")


File saved as data_with_citations.csv
