In [60]:
import requests
import json
from pdf2doi import pdf2doi
import os

def fetch_doi_metadata(doi):
    headers = {
        "Accept": "application/vnd.citationstyles.csl+json"
    }
    url = f"https://doi.org/{doi}"
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises an error for bad responses (4XX, 5XX)
        metadata = response.json()
        return metadata
    except requests.RequestException as e:
        print(f"Error fetching metadata for DOI {doi}: {e}")
        return None

def save_metadata_to_json(metadata, filename):
    # Check if the file already exists
    if not os.path.exists(filename):
        # File does not exist, create and write metadata
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(metadata, f, indent=4, ensure_ascii=False)
        print(f"Metadata saved to {filename}")
    else:
        # File exists, do nothing
        print(f"File {filename} already exists. No action taken.")

file_path = '2010.13061.pdf'
data = pdf2doi(file_path)
if(data['identifier'] != None):
    doi = data['identifier']
    metadata = fetch_doi_metadata(doi)
    save_metadata_to_json(metadata, file_path + '_metadata.json')
else:
    save_metadata_to_json(data['validation_info'], file_path + '_metadata.json')

[pdf2doi]: Trying to retrieve a DOI/identifier for the file: 2010.13061.pdf
[pdf2doi]: Method #1: Looking for a valid identifier in the document infos...
[pdf2doi]: Standardised DOI: 10.48550/arXiv.2010.13061 -> 10.48550/arxiv.2010.13061
[pdf2doi]: Validating the possible DOI 10.48550/arxiv.2010.13061 via a query to dx.doi.org...
[pdf2doi]: The DOI 10.48550/arxiv.2010.13061 is validated by dx.doi.org.
[pdf2doi]: Standardised DOI: 10.48550/arXiv.2010.13061 -> 10.48550/arxiv.2010.13061
[pdf2doi]: A valid DOI was found in the document info labelled '/pdf2doi_identifier'.


Metadata saved to 2010.13061.pdf_metadata.json


In [57]:
file_path = '2010.13061.pdf'
metadata = file_path + '_metadata.json'

In [58]:
metadata

'2010.13061.pdf_metadata.json'