In [3]:
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogenize_latex_encoding
import bibtexparser

# Simulating a small sample from user's provided bibtex for feasibility
sample_bibtex = """
@article{krizhevsky2012imagenet,
  title={Imagenet classification with deep convolutional neural networks},
  author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  journal={Advances in neural information processing systems},
  volume={25},
  year={2012}
}

@book{Sutton1998,
  author    = {Richard S. Sutton and Andrew G. Barto},
  title     = {Reinforcement Learning: An Introduction},
  year      = {1998},
  publisher = {MIT Press},
  edition   = {1st},
  address   = {Cambridge, MA}
}
"""

# Parse the BibTeX string
parser = BibTexParser(common_strings=True)
parser.customization = homogenize_latex_encoding
bib_database = bibtexparser.loads(sample_bibtex, parser=parser)

# Generate LaTeX \bibitem entries
bibitems = []
for entry in bib_database.entries:
    key = entry.get("ID", "")
    authors = entry.get("author", "").replace("\n", " ")
    title = entry.get("title", "")
    year = entry.get("year", "")
    journal = entry.get("journal", entry.get("booktitle", ""))
    volume = entry.get("volume", "")
    pages = entry.get("pages", "")
    publisher = entry.get("publisher", "")
    bibitem = f"\\bibitem{{{key}}} {authors}. {title}. \\textit{{{journal}}}"
    if volume:
        bibitem += f", {volume}"
    if pages:
        bibitem += f", {pages}"
    if year:
        bibitem += f", {year}"
    if publisher:
        bibitem += f". {publisher}"
    bibitem += "."
    bibitems.append(bibitem)

bibitems_text = "\\begin{thebibliography}{99}\n" + "\n\n".join(bibitems) + "\n\\end{thebibliography}"
bibitems_text


'\\begin{thebibliography}{99}\n\\bibitem{krizhevsky2012imagenet} Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E. {I}magenet classification with deep convolutional neural networks. \\textit{Advances in neural information processing systems}, 25, 2012.\n\n\\bibitem{Sutton1998} Richard S. Sutton and Andrew G. Barto. {R}einforcement {L}earning: {A}n {I}ntroduction. \\textit{}, 1998. MIT Press.\n\\end{thebibliography}'

In [4]:
print(bibitems_text)

\begin{thebibliography}{99}
\bibitem{krizhevsky2012imagenet} Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E. {I}magenet classification with deep convolutional neural networks. \textit{Advances in neural information processing systems}, 25, 2012.

\bibitem{Sutton1998} Richard S. Sutton and Andrew G. Barto. {R}einforcement {L}earning: {A}n {I}ntroduction. \textit{}, 1998. MIT Press.
\end{thebibliography}


In [5]:
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogenize_latex_encoding
import bibtexparser
import os

def format_entry(entry):
    key = entry.get("ID", "")
    entry_type = entry.get("ENTRYTYPE", "").lower()
    author = entry.get("author", "").replace("\n", " ")
    title = entry.get("title", "")
    year = entry.get("year", "")
    journal = entry.get("journal", "")
    booktitle = entry.get("booktitle", "")
    volume = entry.get("volume", "")
    pages = entry.get("pages", "")
    publisher = entry.get("publisher", "")
    organization = entry.get("organization", "")
    url = entry.get("url", "")
    note = entry.get("note", "")

    bibitem = f"\\bibitem{{{key}}} {author}. {title}."

    if entry_type == "article":
        if journal:
            bibitem += f" \\textit{{{journal}}}"
        if volume:
            bibitem += f", {volume}"
        if pages:
            bibitem += f", {pages}"
    elif entry_type == "inproceedings":
        if booktitle:
            bibitem += f" In \\textit{{{booktitle}}}"
        if pages:
            bibitem += f", pp. {pages}"
        if organization:
            bibitem += f". {organization}"
    elif entry_type == "misc":
        if note:
            bibitem += f" {note}"
    
    if year:
        bibitem += f", {year}"
    if publisher:
        bibitem += f". {publisher}"
    if url:
        bibitem += f". \\url{{{url}}}"

    return bibitem.strip(" .") + "."

# === MAIN EXECUTION ===

bib_path = "bibliography.bib"
assert os.path.exists(bib_path), f"Can't find {bib_path}"

with open(bib_path, "r", encoding="utf-8") as bibfile:
    bibtex_str = bibfile.read()

parser = BibTexParser(common_strings=True)
parser.customization = homogenize_latex_encoding
bib_database = bibtexparser.loads(bibtex_str, parser=parser)

bibitems = [format_entry(entry) for entry in bib_database.entries]
bibitems_text = "\\begin{thebibliography}{99}\n" + "\n\n".join(bibitems) + "\n\\end{thebibliography}"

with open("generated_bibitems.tex", "w", encoding="utf-8") as f:
    f.write(bibitems_text)

print("✅ All entries written to generated_bibitems.tex")


✅ All entries written to generated_bibitems.tex


In [6]:
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogenize_latex_encoding
import re

with open("main.tex") as f:
    tex = f.read()

cited_keys = set(re.findall(r'\\cite\w*\{([^}]+)\}', tex))
cited_keys = {k.strip() for key in cited_keys for k in key.split(',')}

with open("bibliography.bib") as f:
    parser = BibTexParser(common_strings=True)
    parser.customization = homogenize_latex_encoding
    db = bibtexparser.load(f, parser=parser)

filtered_entries = [entry for entry in db.entries if entry['ID'] in cited_keys]
db.entries = filtered_entries

with open("cleaned.bib", "w") as f:
    f.write(bibtexparser.dumps(db))


FileNotFoundError: [Errno 2] No such file or directory: 'main.tex'

In [8]:
import re
from collections import Counter

# Simulate input from user as a long LaTeX bibliography string
with open("generated_bibitems.tex", "r", encoding="utf-8") as f:
    tex_content = f.read()

# Extract all citation keys from \bibitem{key}
keys = re.findall(r"\\bibitem\{(.*?)\}", tex_content)

# Count duplicates
key_counts = Counter(keys)
duplicates = {k: v for k, v in key_counts.items() if v > 1}
duplicates

{}