From 759a129d4b9a63ec9f8af46efb156a6d11f0b102 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:24:47 +0200
Subject: [PATCH 01/11] Add a script to check for new publications that should
 be indexed on the website

---
 .../workflows/check-for-new-publications.yml  |  54 ++++
 scripts/update_publications.py                | 231 ++++++++++++++++++
 scripts/venue_abbreviations.yml               |  36 +++
 3 files changed, 321 insertions(+)
 create mode 100644 .github/workflows/check-for-new-publications.yml
 create mode 100644 scripts/update_publications.py
 create mode 100644 scripts/venue_abbreviations.yml

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
new file mode 100644
index 0000000..d08141a
--- /dev/null
+++ b/.github/workflows/check-for-new-publications.yml
@@ -0,0 +1,54 @@
+name: Update Publications
+
+on:
+  schedule:
+    # Runs at 03:00 on the first day of each month
+    - cron: '0 3 1 * *'
+  workflow_dispatch:  # allows manual trigger
+
+jobs:
+  update-publications:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests pyyaml unidecode
+      - name: Get last run date
+        run: |
+          last_run_iso=$(gh run list --workflow "Update Publications" --status success --limit 1 --json createdAt --jq '.[0].createdAt'  2>/dev/null || echo "")
+          if [ -z "$last_run_iso" ]; then
+            last_run_iso="2019-01-01T00:00:00Z"   # fallback default
+          fi
+          last_run=$(date -u -d "$last_run_iso" +"%Y-%m-%d")
+          echo "LAST_RUN=$last_run" >> $GITHUB_ENV
+      - name: Create branch
+        run: |
+          branch_name="update-publications-$(date +'%Y%m%d')"
+          git checkout -b $branch_name
+          echo "branch_name=${branch_name}" >> $GITHUB_ENV
+      - name: Run publication update script
+        run: python scripts/update_publications.py
+      - name: Commit changes
+        run: |
+          git add content/publication
+          git commit -m "Automated update of publications" || echo "No changes to commit"
+      - name: Push branch
+        run: git push origin HEAD
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Create Pull Request
+        run: |
+          branch_name=$(git rev-parse --abbrev-ref HEAD)
+          gh pr create \
+            --title "Update publications" \
+            --body "Automated update of publications since ${LAST_RUN}." \
+            --base main \
+            --head $branch_name
+
diff --git a/scripts/update_publications.py b/scripts/update_publications.py
new file mode 100644
index 0000000..2c9952c
--- /dev/null
+++ b/scripts/update_publications.py
@@ -0,0 +1,231 @@
+import requests
+import os
+import re
+import datetime
+import yaml
+from pathlib import Path
+import unidecode
+
+# === Config ===
+PROJECT_NAME = "gysela"     # change to your project keyword
+AUTHOR_DIR = Path(__file__).parent.parent / "content" / "authors"
+PUBLICATION_DIR = Path(__file__).parent.parent / "content" / "publication"
+last_run=os.environ['LAST_RUN']
+#LAST_RUN_FILE = Path("last_run.txt")
+VENUE_ABBREVIATIONS_FILE = Path("venue_abbreviations.yml")
+
+existing_slugs = {p.stem for p in PUBLICATION_DIR.iterdir() if p.is_dir()}
+
+# === Helpers ===
+def load_abbrev_map():
+    if VENUE_ABBREVIATIONS_FILE.exists():
+        with open(VENUE_ABBREVIATIONS_FILE) as f:
+            return yaml.safe_load(f)
+    return {}
+
+def load_key_authors():
+    key_authors = []
+    for md_file in Path(AUTHOR_DIR).rglob("*.md"):
+        with open(md_file, encoding="utf-8") as f:
+            content = f.read()
+        if content.startswith("---"):
+            front_matter = content.split("---", 2)[1]
+            data = yaml.safe_load(front_matter)
+            if "name" in data and "organizations" in data:
+                orgs = [o["name"] for o in data.get("organizations", []) if "name" in o]
+                key_authors.append({
+                    "name": " ".join(data["name"].split(" ")[1:]),
+                    "organizations": orgs
+                })
+    return key_authors
+
+def get_first_author_surname(authorships):
+    if authorships:
+        first_author = authorships[0]["author"]["display_name"]
+        surname = first_author.split()[-1]
+        return unidecode.unidecode(surname).lower()
+    return "unknown"
+
+def get_all_authors(authorships):
+    return " and ".join(a["author"]["display_name"] for a in authorships) if authorships else "Unknown"
+
+def author_matches(work_authorships, key_authors):
+    for a in work_authorships:
+        author_name = a["raw_author_name"]
+        institutions = [i["raw_affiliation_string"] for i in a["affiliations"]]
+        for ka in key_authors:
+            if (ka["name"].lower() in author_name.lower()) and \
+                    any(org.lower() in instit.lower() for org in ka["organizations"] for instit in institutions):
+                return True
+    return False
+
+def make_slug(meta, abbrev_map):
+    surname = get_first_author_surname(meta["authorships"])
+    if meta["venue_full"] in abbrev_map:
+        venue = abbrev_map[meta["venue_full"]]['slug']
+    else:
+        venue = meta["venue_full"]
+    year = str(meta["year"])
+    slug_base = f"{surname}-{venue}-{year}"
+    slug = slug_base
+    i = 2
+    while slug in existing_slugs:
+        slug = f'{slug_base}_{i}'
+        i+=1
+    existing_slugs.add(slug)
+    return slug
+
+def extract_metadata(work, abbrev_map):
+    """Extract shared metadata for front_matter and bibtex."""
+    title = work.get("title", "")
+    authorships = work.get("authorships", [])
+    authors_list = [a["author"]["display_name"] for a in authorships]
+    authors_bibtex = get_all_authors(authorships)
+    surname = get_first_author_surname(authorships)
+    venue_host = work.get("host_venue", {}).get("display_name")
+    venue_primary = work.get("primary_location", {})
+    if venue_primary:
+        venue_primary = venue_primary.get("source", {})
+        if venue_primary:
+            venue_primary = venue_primary.get("display_name")
+    venue_full = venue_primary or venue_host or ""
+    year = work.get("publication_year", "")
+    doi = work.get("doi")
+    url = f"https://doi.org/{doi}" if doi else None
+    pub_date = work.get("publication_date", "1900-01-01")
+    biblio = work.get("biblio", {})
+    volume = biblio.get("volume")
+    issue = biblio.get("issue")
+    first_page = biblio.get("first_page")
+    last_page = biblio.get("last_page")
+    pages = f"{first_page}--{last_page}" if first_page and last_page else None
+    abstract = work.get("abstract_inverted_index") and " ".join(work["abstract_inverted_index"].keys()) or ""
+    return {
+        "title": title,
+        "authors_list": authors_list,
+        "authors_bibtex": authors_bibtex,
+        "authorships": authorships,
+        "venue_full": venue_full,
+        "year": year,
+        "doi": doi,
+        "url": url,
+        "pub_date": pub_date,
+        "volume": volume,
+        "issue": issue,
+        "pages": pages,
+        "surname": surname,
+        "abstract": abstract
+    }
+
+def to_bibtex(meta, slug, abbrev_map):
+    if meta["venue_full"] in abbrev_map:
+        venue = abbrev_map[meta["venue_full"]]['bibtex']
+    else:
+        venue = meta["venue_full"]
+    fields = {
+        "title": meta["title"],
+        "author": meta["authors_bibtex"],
+        "journal": venue, 
+        "year": meta["year"],
+        "volume": meta["volume"],
+        "number": meta["issue"],
+        "pages": meta["pages"],
+        "doi": meta["doi"],
+        "url": meta["url"]
+    }
+    lines = [f"@article{{{slug},"]
+    lines.extend(f"  {k} = {{{v}}}," for k, v in fields.items() if v)
+    lines[-1] = lines[-1].rstrip(",")  # drop trailing comma
+    lines.append("}")
+    return "\n".join(lines)
+
+def write_index_md(folder, meta):
+    front_matter = {
+        "title": meta["title"],
+        "subtitle": "",
+        "summary": "",
+        "authors": meta["authors_list"],
+        "tags": [],
+        "categories": [],
+        "date": meta["pub_date"],
+        "lastmod": datetime.datetime.now().isoformat(),
+        "featured": False,
+        "draft": False,
+        "image": {"caption": "", "focal_point": "", "preview_only": False},
+        "projects": [],
+        "publishDate": datetime.datetime.now().isoformat(),
+        "publication_types": ["1"],
+        "abstract": meta["abstract"],
+        "publication": meta["venue_full"],
+        "doi": meta["doi"] or ""
+    }
+    index_md = "---\n" + yaml.dump(front_matter, sort_keys=False) + "---\n"
+    (folder / "_index.md").write_text(index_md, encoding="utf-8")
+
+# === Main ===
+def main():
+    #last_run = load_last_run()
+    abbrev_map = load_abbrev_map()
+    key_authors = load_key_authors()
+
+    found_doi = set()
+
+    for PROJECT_NAME in ('gysela', 'gyselax', 'gyselalib'):
+        url = "https://api.openalex.org/works"
+        params = {
+            "search": PROJECT_NAME,
+            "filter": f"from_publication_date:{last_run}",
+            "per-page": 50
+        }
+        response = requests.get(url, params=params)
+        response.raise_for_status()
+        data = response.json()
+        results = data.get("results", [])
+        print(f"Found {len(results)} results for {PROJECT_NAME} since {last_run}")
+
+        for work in results:
+            if work.get("type") == "preprint":
+                continue
+            meta = extract_metadata(work, abbrev_map)
+            if "arxiv" in meta["venue_full"].lower():
+                print("Discarding preprint : ", meta["title"])
+                continue
+            gysela_in_title = PROJECT_NAME in meta["title"].lower()
+            gysela_in_abstract = PROJECT_NAME in meta["abstract"].lower()
+            written_by_key_author = author_matches(meta["authorships"], key_authors)
+            if not (gysela_in_title or gysela_in_abstract) and \
+                    not written_by_key_author:
+                print("Discarding citation : ", meta["title"], meta["authors_list"])
+                continue
+
+            if meta["doi"] in found_doi:
+                continue
+            found_doi.add(meta["doi"])
+
+            print("Saving :")
+            print("    ", meta["title"])
+            print("    ", meta["authors_list"])
+            if gysela_in_title or gysela_in_abstract:
+                print("Mentioning Gysela prominently")
+            if written_by_key_author:
+                print("Written by permanent contributor")
+            print()
+
+            slug = make_slug(meta, abbrev_map)
+            folder = PUBLICATION_DIR / slug
+            folder.mkdir(parents=True, exist_ok=True)
+
+            # Write index.md
+            write_index_md(folder, meta)
+
+            # Write cite.bib
+            bibtex = to_bibtex(meta, slug, abbrev_map)
+            (folder / "cite.bib").write_text(bibtex, encoding="utf-8")
+
+    #today = datetime.date.today().isoformat()
+    #save_last_run(today)
+    #print(f"Updated last run date to {today}")
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/venue_abbreviations.yml b/scripts/venue_abbreviations.yml
new file mode 100644
index 0000000..cf15b17
--- /dev/null
+++ b/scripts/venue_abbreviations.yml
@@ -0,0 +1,36 @@
+Journal of Computational Physics:
+  slug: jcp
+  bibtex: "J. Comput. Phys."
+Journal of Plasma Physics:
+  slug: jpp
+  bibtex: "J. Plasma Phys."
+Computer Physics Communications:
+  slug: cpc
+  bibtex: "Comput. Phys. Commun."
+Concurrency and Computation Practice and Experience:
+  slug: ccpe
+  bibtex: "Concurrency and Computation Practice and Experience"
+Plasma Physics and Controlled Fusion:
+  slug: ppcf
+  bibtex: "Plasma Phys. Controlled Fusion"
+SMAI Journal of Computational Mathematics:
+  slug: smai
+  bibtex: "SMAI Journal of Computational Mathematics"
+Communications Physics:
+  slug: cp
+  bibtex: "Commun. Phys."
+The International Journal of High Performance Computing Applications:
+  slug: ijhpca
+  bibtex: "Int. J. High Perform. Comput. Appl."
+Physics of Plasmas:
+  slug: po-p
+  bibtex: "Phys. Plasmas"
+Nuclear Fusion:
+  slug: nf
+  bibtex: "Nucl. Fusion"
+Physical review. E:
+  slug: pre
+  bibtex: "Phys. Rev. E"
+Physical Review Letters:
+  slug: prl
+  bibtex: "Phys. Rev. Lett."

From a36401f9f5f2ac6869feae157e8c32d6ea1e0d00 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:25:18 +0200
Subject: [PATCH 02/11] Change trigger to test

---
 .github/workflows/check-for-new-publications.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index d08141a..b4ba888 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -5,6 +5,7 @@ on:
     # Runs at 03:00 on the first day of each month
     - cron: '0 3 1 * *'
   workflow_dispatch:  # allows manual trigger
+  push:
 
 jobs:
   update-publications:

From 6bdbf02ed5f9f524764064469f07f6238518f75c Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:26:29 +0200
Subject: [PATCH 03/11] Missing token

---
 .github/workflows/check-for-new-publications.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index b4ba888..c55ff0f 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -43,7 +43,7 @@ jobs:
       - name: Push branch
         run: git push origin HEAD
         env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ github.token }}
       - name: Create Pull Request
         run: |
           branch_name=$(git rev-parse --abbrev-ref HEAD)
@@ -52,4 +52,5 @@ jobs:
             --body "Automated update of publications since ${LAST_RUN}." \
             --base main \
             --head $branch_name
-
+        env:
+          GH_TOKEN: ${{ github.token }}

From 2997923765bf43e989a83c2cc62b206a436d43a9 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:29:16 +0200
Subject: [PATCH 04/11] Increase page limit

---
 scripts/update_publications.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/update_publications.py b/scripts/update_publications.py
index 2c9952c..722cc83 100644
--- a/scripts/update_publications.py
+++ b/scripts/update_publications.py
@@ -175,7 +175,7 @@ def main():
         params = {
             "search": PROJECT_NAME,
             "filter": f"from_publication_date:{last_run}",
-            "per-page": 50
+            "per-page": 100
         }
         response = requests.get(url, params=params)
         response.raise_for_status()

From 4994785ec0068a01e0d4f46c2a9d5ef685ce5127 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:37:04 +0200
Subject: [PATCH 05/11] Exit if no publications found

---
 .../workflows/check-for-new-publications.yml   | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index c55ff0f..02bcc84 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -13,6 +13,7 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -25,26 +26,41 @@ jobs:
         run: |
           last_run_iso=$(gh run list --workflow "Update Publications" --status success --limit 1 --json createdAt --jq '.[0].createdAt'  2>/dev/null || echo "")
           if [ -z "$last_run_iso" ]; then
+            echo "No last run found"
             last_run_iso="2019-01-01T00:00:00Z"   # fallback default
           fi
           last_run=$(date -u -d "$last_run_iso" +"%Y-%m-%d")
+          echo "LAST_RUN=$last_run"
           echo "LAST_RUN=$last_run" >> $GITHUB_ENV
       - name: Create branch
         run: |
           branch_name="update-publications-$(date +'%Y%m%d')"
-          git checkout -b $branch_name
           echo "branch_name=${branch_name}" >> $GITHUB_ENV
       - name: Run publication update script
         run: python scripts/update_publications.py
+      - name: Check for changes
+        id: check_changes
+        run: |
+          if git diff --quiet content/publication; then
+            echo "No new publications found."
+            echo "has_new=false" >> $GITHUB_ENV
+          else
+            echo "has_new=true" >> $GITHUB_ENV
+          fi
       - name: Commit changes
+        if: env.has_new
         run: |
+          git checkout main
+          git checkout -b $branch_name
           git add content/publication
           git commit -m "Automated update of publications" || echo "No changes to commit"
       - name: Push branch
+        if: env.has_new
         run: git push origin HEAD
         env:
           GH_TOKEN: ${{ github.token }}
       - name: Create Pull Request
+        if: env.has_new
         run: |
           branch_name=$(git rev-parse --abbrev-ref HEAD)
           gh pr create \

From eb394b9f3aae9181206957b264a35afdf4097436 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:43:55 +0200
Subject: [PATCH 06/11] Ensure main is available

---
 .github/workflows/check-for-new-publications.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index 02bcc84..15f915d 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -13,6 +13,8 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
 
       - name: Set up Python
         uses: actions/setup-python@v4

From ad3a34a9742e930767f3f11c8960f8da16e22a48 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:46:58 +0200
Subject: [PATCH 07/11] correct condition

---
 .github/workflows/check-for-new-publications.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index 15f915d..d8d4166 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -45,24 +45,24 @@ jobs:
         run: |
           if git diff --quiet content/publication; then
             echo "No new publications found."
-            echo "has_new=false" >> $GITHUB_ENV
+            echo "has_new=false" >> $GITHUB_OUTPUT
           else
-            echo "has_new=true" >> $GITHUB_ENV
+            echo "has_new=true" >> $GITHUB_OUTPUT
           fi
       - name: Commit changes
-        if: env.has_new
+        if: steps.check_new.outputs.has_new == 'true'
         run: |
           git checkout main
           git checkout -b $branch_name
           git add content/publication
           git commit -m "Automated update of publications" || echo "No changes to commit"
       - name: Push branch
-        if: env.has_new
+        if: steps.check_new.outputs.has_new == 'true'
         run: git push origin HEAD
         env:
           GH_TOKEN: ${{ github.token }}
       - name: Create Pull Request
-        if: env.has_new
+        if: steps.check_new.outputs.has_new == 'true'
         run: |
           branch_name=$(git rev-parse --abbrev-ref HEAD)
           gh pr create \

From 587b95eefccabd4dd7425735cbf1aaef0bf8694e Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:48:27 +0200
Subject: [PATCH 08/11] Remove test trigger

---
 .github/workflows/check-for-new-publications.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index d8d4166..f372a4d 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -5,7 +5,6 @@ on:
     # Runs at 03:00 on the first day of each month
     - cron: '0 3 1 * *'
   workflow_dispatch:  # allows manual trigger
-  push:
 
 jobs:
   update-publications:

From cdfbb1f01a1ff4b8300d33afca4c8ee559f9c09d Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:55:31 +0200
Subject: [PATCH 09/11] Clean up script

---
 scripts/update_publications.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/update_publications.py b/scripts/update_publications.py
index 722cc83..7535230 100644
--- a/scripts/update_publications.py
+++ b/scripts/update_publications.py
@@ -10,9 +10,8 @@
 PROJECT_NAME = "gysela"     # change to your project keyword
 AUTHOR_DIR = Path(__file__).parent.parent / "content" / "authors"
 PUBLICATION_DIR = Path(__file__).parent.parent / "content" / "publication"
-last_run=os.environ['LAST_RUN']
-#LAST_RUN_FILE = Path("last_run.txt")
 VENUE_ABBREVIATIONS_FILE = Path("venue_abbreviations.yml")
+LAST_RUN=os.environ['LAST_RUN']
 
 existing_slugs = {p.stem for p in PUBLICATION_DIR.iterdir() if p.is_dir()}
 
@@ -164,7 +163,6 @@ def write_index_md(folder, meta):
 
 # === Main ===
 def main():
-    #last_run = load_last_run()
     abbrev_map = load_abbrev_map()
     key_authors = load_key_authors()
 
@@ -174,22 +172,27 @@ def main():
         url = "https://api.openalex.org/works"
         params = {
             "search": PROJECT_NAME,
-            "filter": f"from_publication_date:{last_run}",
+            "filter": f"from_publication_date:{LAST_RUN}",
             "per-page": 100
         }
         response = requests.get(url, params=params)
         response.raise_for_status()
         data = response.json()
         results = data.get("results", [])
-        print(f"Found {len(results)} results for {PROJECT_NAME} since {last_run}")
+        print(f"Found {len(results)} results for {PROJECT_NAME} since {LAST_RUN}")
 
         for work in results:
+            # Discard preprints
             if work.get("type") == "preprint":
                 continue
+
             meta = extract_metadata(work, abbrev_map)
+
+            # Discard preprints
             if "arxiv" in meta["venue_full"].lower():
-                print("Discarding preprint : ", meta["title"])
                 continue
+
+            # Check relevance
             gysela_in_title = PROJECT_NAME in meta["title"].lower()
             gysela_in_abstract = PROJECT_NAME in meta["abstract"].lower()
             written_by_key_author = author_matches(meta["authorships"], key_authors)
@@ -198,6 +201,7 @@ def main():
                 print("Discarding citation : ", meta["title"], meta["authors_list"])
                 continue
 
+            # Discard if already found
             if meta["doi"] in found_doi:
                 continue
             found_doi.add(meta["doi"])
@@ -222,10 +226,6 @@ def main():
             bibtex = to_bibtex(meta, slug, abbrev_map)
             (folder / "cite.bib").write_text(bibtex, encoding="utf-8")
 
-    #today = datetime.date.today().isoformat()
-    #save_last_run(today)
-    #print(f"Updated last run date to {today}")
-
 if __name__ == "__main__":
     main()
 

From 5a632ad265ddb76e68e2df071e7d3d7c964ad139 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 16:59:02 +0200
Subject: [PATCH 10/11] Improve key

---
 scripts/update_publications.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/update_publications.py b/scripts/update_publications.py
index 7535230..29adfd4 100644
--- a/scripts/update_publications.py
+++ b/scripts/update_publications.py
@@ -172,7 +172,7 @@ def main():
         url = "https://api.openalex.org/works"
         params = {
             "search": PROJECT_NAME,
-            "filter": f"from_publication_date:{LAST_RUN}",
+            "filter": f"from_indexed_date:{LAST_RUN}",
             "per-page": 100
         }
         response = requests.get(url, params=params)

From 0ce7b981538c60e5d7ab49f7400c97a9dee03faf Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 12 Sep 2025 17:05:19 +0200
Subject: [PATCH 11/11] Add a 2 month buffer for indexing and check against
 existing DOIs

---
 .../workflows/check-for-new-publications.yml  |  9 +++---
 scripts/update_publications.py                | 29 +++++++++++++------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/check-for-new-publications.yml b/.github/workflows/check-for-new-publications.yml
index f372a4d..e4029ba 100644
--- a/.github/workflows/check-for-new-publications.yml
+++ b/.github/workflows/check-for-new-publications.yml
@@ -30,9 +30,10 @@ jobs:
             echo "No last run found"
             last_run_iso="2019-01-01T00:00:00Z"   # fallback default
           fi
-          last_run=$(date -u -d "$last_run_iso" +"%Y-%m-%d")
-          echo "LAST_RUN=$last_run"
-          echo "LAST_RUN=$last_run" >> $GITHUB_ENV
+          # Remove 2 months to allow lots of time for indexing
+          CHECK_FROM=$(date -u -d "$last_run_iso -2 months" +"%Y-%m-%d")
+          echo "CHECK_FROM=$CHECK_FROM"
+          echo "CHECK_FROM=$CHECK_FROM" >> $GITHUB_ENV
       - name: Create branch
         run: |
           branch_name="update-publications-$(date +'%Y%m%d')"
@@ -66,7 +67,7 @@ jobs:
           branch_name=$(git rev-parse --abbrev-ref HEAD)
           gh pr create \
             --title "Update publications" \
-            --body "Automated update of publications since ${LAST_RUN}." \
+            --body "Automated update of publications since ${CHECK_FROM}." \
             --base main \
             --head $branch_name
         env:
diff --git a/scripts/update_publications.py b/scripts/update_publications.py
index 29adfd4..94771e6 100644
--- a/scripts/update_publications.py
+++ b/scripts/update_publications.py
@@ -11,7 +11,7 @@
 AUTHOR_DIR = Path(__file__).parent.parent / "content" / "authors"
 PUBLICATION_DIR = Path(__file__).parent.parent / "content" / "publication"
 VENUE_ABBREVIATIONS_FILE = Path("venue_abbreviations.yml")
-LAST_RUN=os.environ['LAST_RUN']
+CHECK_FROM=os.environ['CHECK_FROM']
 
 existing_slugs = {p.stem for p in PUBLICATION_DIR.iterdir() if p.is_dir()}
 
@@ -24,7 +24,7 @@ def load_abbrev_map():
 
 def load_key_authors():
     key_authors = []
-    for md_file in Path(AUTHOR_DIR).rglob("*.md"):
+    for md_file in AUTHOR_DIR.rglob("*.md"):
         with open(md_file, encoding="utf-8") as f:
             content = f.read()
         if content.startswith("---"):
@@ -38,6 +38,18 @@ def load_key_authors():
                 })
     return key_authors
 
+def load_known_dois():
+    dois = set()
+    for md_file in PUBLICATION_DIR.rglob("*.md"):
+        with open(md_file, encoding="utf-8") as f:
+            content = f.read()
+        if content.startswith("---"):
+            front_matter = content.split("---", 2)[1]
+            data = yaml.safe_load(front_matter)
+            if "doi" in data:
+                dois.add(data["doi"])
+    return dois
+
 def get_first_author_surname(authorships):
     if authorships:
         first_author = authorships[0]["author"]["display_name"]
@@ -159,27 +171,26 @@ def write_index_md(folder, meta):
         "doi": meta["doi"] or ""
     }
     index_md = "---\n" + yaml.dump(front_matter, sort_keys=False) + "---\n"
-    (folder / "_index.md").write_text(index_md, encoding="utf-8")
+    (folder / "index.md").write_text(index_md, encoding="utf-8")
 
 # === Main ===
 def main():
     abbrev_map = load_abbrev_map()
     key_authors = load_key_authors()
-
-    found_doi = set()
+    dois = load_known_dois()
 
     for PROJECT_NAME in ('gysela', 'gyselax', 'gyselalib'):
         url = "https://api.openalex.org/works"
         params = {
             "search": PROJECT_NAME,
-            "filter": f"from_indexed_date:{LAST_RUN}",
+            "filter": f"from_publication_date:{CHECK_FROM}",
             "per-page": 100
         }
         response = requests.get(url, params=params)
         response.raise_for_status()
         data = response.json()
         results = data.get("results", [])
-        print(f"Found {len(results)} results for {PROJECT_NAME} since {LAST_RUN}")
+        print(f"Found {len(results)} results for {PROJECT_NAME} since {CHECK_FROM}")
 
         for work in results:
             # Discard preprints
@@ -202,9 +213,9 @@ def main():
                 continue
 
             # Discard if already found
-            if meta["doi"] in found_doi:
+            if meta["doi"] in dois:
                 continue
-            found_doi.add(meta["doi"])
+            dois.add(meta["doi"])
 
             print("Saving :")
             print("    ", meta["title"])