From dd0261976e45a8fe9070880912d18ac9abdfd535 Mon Sep 17 00:00:00 2001 From: Albert Engstfeld Date: Thu, 30 Apr 2026 23:42:21 +0200 Subject: [PATCH 1/4] Improve loading speed of the database description --- unitpackage/database/echemdb.py | 30 +++++++++++++-------------- unitpackage/database/echemdb_entry.py | 3 ++- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/unitpackage/database/echemdb.py b/unitpackage/database/echemdb.py index e5f2a21d..5144553e 100644 --- a/unitpackage/database/echemdb.py +++ b/unitpackage/database/echemdb.py @@ -149,24 +149,22 @@ def bibliography(self): '' """ - from pybtex.database import BibliographyData - - bib_data = BibliographyData( - { - entry.bibliography.key: entry.bibliography - for entry in self - if entry.bibliography - } - ) + from pybtex.database import BibliographyData, parse_string - if isinstance(bib_data, str): - return bib_data + # Parse each unique bibdata string only once; many entries share a publication. + seen_citations = {} + for entry in self: + citation = entry._default_metadata.get("source", {}).get("bibdata", "") + if citation and citation not in seen_citations: + seen_citations[citation] = parse_string(citation, "bibtex") - # Remove duplicates from the bibliography - bib_data_ = BibliographyData() + if not seen_citations: + return "" - for key, entry in bib_data.entries.items(): - if key not in bib_data_.entries: - bib_data_.add_entry(key, entry) + bib_data_ = BibliographyData() + for parsed in seen_citations.values(): + for key, bib_entry in parsed.entries.items(): + if key not in bib_data_.entries: + bib_data_.add_entry(key, bib_entry) return bib_data_ diff --git a/unitpackage/database/echemdb_entry.py b/unitpackage/database/echemdb_entry.py index 425ccf44..e9513b34 100644 --- a/unitpackage/database/echemdb_entry.py +++ b/unitpackage/database/echemdb_entry.py @@ -65,6 +65,7 @@ # along with unitpackage. If not, see . # ******************************************************************** import logging +from functools import cached_property from unitpackage.entry import Entry @@ -174,7 +175,7 @@ def from_mpt(cls, csvname, encoding=None): return entry - @property + @cached_property def bibliography(self): r""" Return a pybtex bibliography object associated with this entry. From 45af9a433f7c5d61a36135a51bf113bf396b0bfa Mon Sep 17 00:00:00 2001 From: Albert Engstfeld Date: Thu, 30 Apr 2026 23:44:14 +0200 Subject: [PATCH 2/4] Improve decsription speed by caching the bibliography data --- doc/news/describe-perfromance.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/news/describe-perfromance.rst diff --git a/doc/news/describe-perfromance.rst b/doc/news/describe-perfromance.rst new file mode 100644 index 00000000..163ecddf --- /dev/null +++ b/doc/news/describe-perfromance.rst @@ -0,0 +1,3 @@ +**Performance:** + +* Improved speed to return the echemdb description by caching the bibliography data. From db51aa6529e833a1dcca22cc9e4145d448e469ee Mon Sep 17 00:00:00 2001 From: Albert Engstfeld Date: Thu, 30 Apr 2026 23:47:26 +0200 Subject: [PATCH 3/4] fix lint --- unitpackage/database/echemdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unitpackage/database/echemdb.py b/unitpackage/database/echemdb.py index 5144553e..1f27f270 100644 --- a/unitpackage/database/echemdb.py +++ b/unitpackage/database/echemdb.py @@ -154,7 +154,10 @@ def bibliography(self): # Parse each unique bibdata string only once; many entries share a publication. seen_citations = {} for entry in self: - citation = entry._default_metadata.get("source", {}).get("bibdata", "") + try: + citation = entry.source.bibdata + except AttributeError: + citation = "" if citation and citation not in seen_citations: seen_citations[citation] = parse_string(citation, "bibtex") From a4b3b2f10b4d6934cf9980b1c463c893ab35a416 Mon Sep 17 00:00:00 2001 From: Albert Engstfeld Date: Sat, 2 May 2026 17:34:44 +0200 Subject: [PATCH 4/4] Remove duplicate checking for duplicates --- unitpackage/database/echemdb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unitpackage/database/echemdb.py b/unitpackage/database/echemdb.py index 1f27f270..35b3dc79 100644 --- a/unitpackage/database/echemdb.py +++ b/unitpackage/database/echemdb.py @@ -167,7 +167,6 @@ def bibliography(self): bib_data_ = BibliographyData() for parsed in seen_citations.values(): for key, bib_entry in parsed.entries.items(): - if key not in bib_data_.entries: - bib_data_.add_entry(key, bib_entry) + bib_data_.add_entry(key, bib_entry) return bib_data_