In [36]:
import sys
import urllib.request
import bibtexparser
from urllib.error import HTTPError

BASE_URL = 'http://dx.doi.org/'

def fetch_bibtex(doi):
    """Fetch and parse BibTeX entry for a DOI."""
    url = BASE_URL + doi
    req = urllib.request.Request(url)
    req.add_header('Accept', 'application/x-bibtex')
    
    try:
        with urllib.request.urlopen(req) as f:
            bibtex = f.read().decode()
            # Parse the BibTeX entry
            bibtex = bibtexparser.loads(bibtex)
            return bibtexparser.dumps(bibtex)
    except HTTPError as e:
        if e.code == 404:
            print(f"DOI {doi} not found.")
        else:
            print(f"Service unavailable for DOI {doi}.")
        return None

def read_dois_from_file(file_path):
    """Reads DOIs from a text file."""
    with open(file_path, "r", encoding="utf-8") as file:
        return [line.strip() for line in file if line.strip()]

def save_bibtex_to_file(bibtex_entries, output_file):
    """Saves the BibTeX entries to a file."""
    with open(output_file, "w", encoding="utf-8") as file:
        for entry in bibtex_entries:
            file.write(entry + "\n\n")
    print(f"📄 BibTeX file saved to {output_file}")

# File paths
doi_file = "dois.txt"  # Input file containing DOI links (one per line)
bib_file = "references.bib"  # Output BibTeX file

# Read DOIs from the file
dois = read_dois_from_file(doi_file)

# Fetch BibTeX for each DOI and collect entries
bibtex_entries = []
for doi in dois:
    print(f"Fetching BibTeX for DOI: {doi}")
    bibtex = fetch_bibtex(doi)
    if bibtex:
        bibtex_entries.append(bibtex)

# Save the collected BibTeX entries to the file
if bibtex_entries:
    save_bibtex_to_file(bibtex_entries, bib_file)
else:
    print("⚠️ No BibTeX entries found.")


Fetching BibTeX for DOI: https://doi.org/10.1175/JCLI-D-24-0398.1
📄 BibTeX file saved to references.bib


In [7]:
from pyzotero import zotero
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase

# Replace these with your Zotero credentials
library_id = 11990419  # Your Zotero Library ID
library_type = "user"  # "user" for personal library, "group" for a group library
api_key = "CU19YAkq7wITguFXAN122hEN"  # Your Zotero API Key

# Initialize Zotero API
zot = zotero.Zotero(library_id, library_type, api_key)

# Retrieve top 10 items from Zotero (adjust limit if needed)
items = zot.top(limit=1000)

# Initialize BibTeX database
bib_database = BibDatabase()
bib_database.entries = []

for item in items:
    data = item["data"]

    # Extract metadata fields, handling missing values
    title = data.get("title", "Unknown Title")
    authors = " and ".join(
        [f"{author['lastName']}, {author.get('firstName', '')}" for author in data.get("creators", [])]
    )
    journal = data.get("publicationTitle", "Unknown Journal")
    volume = data.get("volume", "")
    number = data.get("issue", "")
    pages = data.get("pages", "")
    year = data.get("date", "").split("-")[0]  # Extract year from full date
    doi = data.get("DOI", "")
    url = data.get("url", "")

    # Generate a BibTeX entry
    bib_entry = {
        "ENTRYTYPE": "article",
        "ID": f"{data.get('key', 'UnknownKey')}",
        "author": authors,
        "title": title,
        "journal": journal,
        "volume": volume,
        "number": number,
        "pages": pages,
        "year": year,
        "doi": doi,
        "url": url,
    }

    # Append entry to database
    bib_database.entries.append(bib_entry)

# Save to BibTeX file
bib_file = "references.bib"
writer = BibTexWriter()
with open(bib_file, "w", encoding="utf-8") as bibfile:
    bibfile.write(writer.write(bib_database))

print(f"📄 BibTeX file saved to {bib_file}")


KeyError: 'lastName'

In [23]:
from pyzotero import zotero
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase
import requests

# Replace these with your Zotero credentials
library_id = 11990419  # Your Zotero Library ID
library_type = "user"  # "user" for personal library, "group" for a group library
api_key = "CU19YAkq7wITguFXAN122hEN"  # Your Zotero API Key

# Initialize Zotero API
zot = zotero.Zotero(library_id, library_type, api_key)

# Retrieve all items from Zotero (pagination handling)
items = []
start = 0  # Start index for pagination
limit = 100  # Zotero API limit per request

while True:
    batch = zot.top(limit=limit, start=start)
    if not batch:
        break  # Stop if no more items are returned
    items.extend(batch)  # Append results
    start += limit  # Move to the next batch

print(f"✅ Total items retrieved from Zotero: {len(items)}")

# Function to fetch BibTeX from DOI
def fetch_bibtex_from_doi(doi):
    try:
        url = f"https://doi.org/{doi}"
        headers = {"Accept": "application/x-bibtex"}
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            return response.text
        else:
            return None
    except Exception as e:
        print(f"Error fetching BibTeX for DOI {doi}: {e}")
        return None


# Initialize BibTeX database
bib_database = BibDatabase()
bib_database.entries = []

# Track missing references
missing_references = []


for item in items:
    data = item["data"]
    title = data.get("title", "")
    authors = " and ".join(
        [f"{author.get('lastName', author.get('name', 'Unknown'))}, {author.get('firstName', '')}"
         for author in data.get("creators", [])]
    )
    journal = data.get("publicationTitle", "")
    volume = data.get("volume", "")
    number = data.get("issue", "")
    pages = data.get("pages", "")
    year = data.get("date", "").split("-")[0] if data.get("date") else ""
    doi = data.get("DOI", "").strip()
    url = data.get("url", "")

    # Check if key metadata fields are missing
    if not title or not authors or not journal or not year:
        if doi:
            print(f"🔍 Fetching missing metadata for DOI: {doi}")
            bibtex_data = fetch_bibtex_from_doi(doi)

            if bibtex_data:
                bibtex_parsed = bibtexparser.loads(bibtex_data)
                if bibtex_parsed.entries:
                    bib_database.entries.append(bibtex_parsed.entries[0])
                    continue  # Move to next paper after successful DOI lookup
                else:
                    print(f"⚠️ No valid BibTeX entry found for DOI: {doi}")
        print(f"❌ Skipping entry due to missing data: {title or 'Unknown Title'}")
        missing_references.append(title or "Unknown Title")
        continue  # Skip to the next item

    # Generate a BibTeX entry for valid papers
    bib_entry = {
        "ENTRYTYPE": "article",
        "ID": f"{data.get('key', 'UnknownKey')}",
        "author": authors,
        "title": title,
        "journal": journal,
        "volume": volume,
        "number": number,
        "pages": pages,
        "year": year,
        "doi": doi,
        "url": url,
    }

    # Append entry to database
    bib_database.entries.append(bib_entry)

# Save to BibTeX file
bib_file = "references.bib"
writer = BibTexWriter()
with open(bib_file, "w", encoding="utf-8") as bibfile:
    bibfile.write(writer.write(bib_database))

# Save missing references to a log file
if missing_references:
    with open("missing_references.txt", "w", encoding="utf-8") as missing_file:
        missing_file.write("\n".join(missing_references))
    print(f"⚠️ Some papers were skipped due to missing data. Check 'missing_references.txt'.")

print(f"📄 BibTeX file saved to {bib_file}")

# Count the number of entries in the BibTeX database
num_bib_entries = len(bib_database.entries)

# Count the number of missing references
num_missing_references = len(missing_references)

# Print the summary
print(f"✅ Total BibTeX entries saved: {num_bib_entries}")
print(f"⚠️ Total missing references: {num_missing_references}")


✅ Total items retrieved from Zotero: 212
❌ Skipping entry due to missing data: FOOD SECURITY AND CLIMATE CHANGE ADAPTATION FRAMEWORK: ISSUES AND CHALLENGES
❌ Skipping entry due to missing data: Trend analysis of wheat area, production and productivity in Nepal: An overview
❌ Skipping entry due to missing data: The Journal of AGRICULTURE AND ENVIRONMENT
❌ Skipping entry due to missing data: Ministry of Agricultural Development
❌ Skipping entry due to missing data: The Origin of Agriculture
❌ Skipping entry due to missing data: Performance Assessment of Irrigation: A Case from Nepal-Asia
❌ Skipping entry due to missing data: The State of Nepal’s Biodiversity for Food and Agriculture
❌ Skipping entry due to missing data: PLANT NUTRIENT MANAGEMENT FOR IMPROVING CROP PRODUCTIVITY IN NEPAL (A Discussion Paper) (DRAFT)
❌ Skipping entry due to missing data: The Role of Women in Agriculture
❌ Skipping entry due to missing data: Primary Production
❌ Skipping entry due to missing data: TRIBHUVAN 