Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/check-for-new-publications.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Update Publications

on:
schedule:
# Runs at 03:00 on the first day of each month
- cron: '0 3 1 * *'
workflow_dispatch: # allows manual trigger

jobs:
update-publications:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests pyyaml unidecode
- name: Get last run date
run: |
last_run_iso=$(gh run list --workflow "Update Publications" --status success --limit 1 --json createdAt --jq '.[0].createdAt' 2>/dev/null || echo "")
if [ -z "$last_run_iso" ]; then
echo "No last run found"
last_run_iso="2019-01-01T00:00:00Z" # fallback default
fi
# Remove 2 months to allow lots of time for indexing
CHECK_FROM=$(date -u -d "$last_run_iso -2 months" +"%Y-%m-%d")
echo "CHECK_FROM=$CHECK_FROM"
echo "CHECK_FROM=$CHECK_FROM" >> $GITHUB_ENV
- name: Create branch
run: |
branch_name="update-publications-$(date +'%Y%m%d')"
echo "branch_name=${branch_name}" >> $GITHUB_ENV
- name: Run publication update script
run: python scripts/update_publications.py
- name: Check for changes
id: check_changes
run: |
if git diff --quiet content/publication; then
echo "No new publications found."
echo "has_new=false" >> $GITHUB_OUTPUT
else
echo "has_new=true" >> $GITHUB_OUTPUT
fi
- name: Commit changes
if: steps.check_new.outputs.has_new == 'true'
run: |
git checkout main
git checkout -b $branch_name
git add content/publication
git commit -m "Automated update of publications" || echo "No changes to commit"
- name: Push branch
if: steps.check_new.outputs.has_new == 'true'
run: git push origin HEAD
env:
GH_TOKEN: ${{ github.token }}
- name: Create Pull Request
if: steps.check_new.outputs.has_new == 'true'
run: |
branch_name=$(git rev-parse --abbrev-ref HEAD)
gh pr create \
--title "Update publications" \
--body "Automated update of publications since ${CHECK_FROM}." \
--base main \
--head $branch_name
env:
GH_TOKEN: ${{ github.token }}
242 changes: 242 additions & 0 deletions scripts/update_publications.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import requests
import os
import re
import datetime
import yaml
from pathlib import Path
import unidecode

# === Config ===
PROJECT_NAME = "gysela" # change to your project keyword
AUTHOR_DIR = Path(__file__).parent.parent / "content" / "authors"
PUBLICATION_DIR = Path(__file__).parent.parent / "content" / "publication"
VENUE_ABBREVIATIONS_FILE = Path("venue_abbreviations.yml")
CHECK_FROM=os.environ['CHECK_FROM']

existing_slugs = {p.stem for p in PUBLICATION_DIR.iterdir() if p.is_dir()}

# === Helpers ===
def load_abbrev_map():
if VENUE_ABBREVIATIONS_FILE.exists():
with open(VENUE_ABBREVIATIONS_FILE) as f:
return yaml.safe_load(f)
return {}

def load_key_authors():
key_authors = []
for md_file in AUTHOR_DIR.rglob("*.md"):
with open(md_file, encoding="utf-8") as f:
content = f.read()
if content.startswith("---"):
front_matter = content.split("---", 2)[1]
data = yaml.safe_load(front_matter)
if "name" in data and "organizations" in data:
orgs = [o["name"] for o in data.get("organizations", []) if "name" in o]
key_authors.append({
"name": " ".join(data["name"].split(" ")[1:]),
"organizations": orgs
})
return key_authors

def load_known_dois():
dois = set()
for md_file in PUBLICATION_DIR.rglob("*.md"):
with open(md_file, encoding="utf-8") as f:
content = f.read()
if content.startswith("---"):
front_matter = content.split("---", 2)[1]
data = yaml.safe_load(front_matter)
if "doi" in data:
dois.add(data["doi"])
return dois

def get_first_author_surname(authorships):
if authorships:
first_author = authorships[0]["author"]["display_name"]
surname = first_author.split()[-1]
return unidecode.unidecode(surname).lower()
return "unknown"

def get_all_authors(authorships):
return " and ".join(a["author"]["display_name"] for a in authorships) if authorships else "Unknown"

def author_matches(work_authorships, key_authors):
for a in work_authorships:
author_name = a["raw_author_name"]
institutions = [i["raw_affiliation_string"] for i in a["affiliations"]]
for ka in key_authors:
if (ka["name"].lower() in author_name.lower()) and \
any(org.lower() in instit.lower() for org in ka["organizations"] for instit in institutions):
return True
return False

def make_slug(meta, abbrev_map):
surname = get_first_author_surname(meta["authorships"])
if meta["venue_full"] in abbrev_map:
venue = abbrev_map[meta["venue_full"]]['slug']
else:
venue = meta["venue_full"]
year = str(meta["year"])
slug_base = f"{surname}-{venue}-{year}"
slug = slug_base
i = 2
while slug in existing_slugs:
slug = f'{slug_base}_{i}'
i+=1
existing_slugs.add(slug)
return slug

def extract_metadata(work, abbrev_map):
"""Extract shared metadata for front_matter and bibtex."""
title = work.get("title", "")
authorships = work.get("authorships", [])
authors_list = [a["author"]["display_name"] for a in authorships]
authors_bibtex = get_all_authors(authorships)
surname = get_first_author_surname(authorships)
venue_host = work.get("host_venue", {}).get("display_name")
venue_primary = work.get("primary_location", {})
if venue_primary:
venue_primary = venue_primary.get("source", {})
if venue_primary:
venue_primary = venue_primary.get("display_name")
venue_full = venue_primary or venue_host or ""
year = work.get("publication_year", "")
doi = work.get("doi")
url = f"https://doi.org/{doi}" if doi else None
pub_date = work.get("publication_date", "1900-01-01")
biblio = work.get("biblio", {})
volume = biblio.get("volume")
issue = biblio.get("issue")
first_page = biblio.get("first_page")
last_page = biblio.get("last_page")
pages = f"{first_page}--{last_page}" if first_page and last_page else None
abstract = work.get("abstract_inverted_index") and " ".join(work["abstract_inverted_index"].keys()) or ""
return {
"title": title,
"authors_list": authors_list,
"authors_bibtex": authors_bibtex,
"authorships": authorships,
"venue_full": venue_full,
"year": year,
"doi": doi,
"url": url,
"pub_date": pub_date,
"volume": volume,
"issue": issue,
"pages": pages,
"surname": surname,
"abstract": abstract
}

def to_bibtex(meta, slug, abbrev_map):
if meta["venue_full"] in abbrev_map:
venue = abbrev_map[meta["venue_full"]]['bibtex']
else:
venue = meta["venue_full"]
fields = {
"title": meta["title"],
"author": meta["authors_bibtex"],
"journal": venue,
"year": meta["year"],
"volume": meta["volume"],
"number": meta["issue"],
"pages": meta["pages"],
"doi": meta["doi"],
"url": meta["url"]
}
lines = [f"@article{{{slug},"]
lines.extend(f" {k} = {{{v}}}," for k, v in fields.items() if v)
lines[-1] = lines[-1].rstrip(",") # drop trailing comma
lines.append("}")
return "\n".join(lines)

def write_index_md(folder, meta):
front_matter = {
"title": meta["title"],
"subtitle": "",
"summary": "",
"authors": meta["authors_list"],
"tags": [],
"categories": [],
"date": meta["pub_date"],
"lastmod": datetime.datetime.now().isoformat(),
"featured": False,
"draft": False,
"image": {"caption": "", "focal_point": "", "preview_only": False},
"projects": [],
"publishDate": datetime.datetime.now().isoformat(),
"publication_types": ["1"],
"abstract": meta["abstract"],
"publication": meta["venue_full"],
"doi": meta["doi"] or ""
}
index_md = "---\n" + yaml.dump(front_matter, sort_keys=False) + "---\n"
(folder / "index.md").write_text(index_md, encoding="utf-8")

# === Main ===
def main():
abbrev_map = load_abbrev_map()
key_authors = load_key_authors()
dois = load_known_dois()

for PROJECT_NAME in ('gysela', 'gyselax', 'gyselalib'):
url = "https://api.openalex.org/works"
params = {
"search": PROJECT_NAME,
"filter": f"from_publication_date:{CHECK_FROM}",
"per-page": 100
}
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
print(f"Found {len(results)} results for {PROJECT_NAME} since {CHECK_FROM}")

for work in results:
# Discard preprints
if work.get("type") == "preprint":
continue

meta = extract_metadata(work, abbrev_map)

# Discard preprints
if "arxiv" in meta["venue_full"].lower():
continue

# Check relevance
gysela_in_title = PROJECT_NAME in meta["title"].lower()
gysela_in_abstract = PROJECT_NAME in meta["abstract"].lower()
written_by_key_author = author_matches(meta["authorships"], key_authors)
if not (gysela_in_title or gysela_in_abstract) and \
not written_by_key_author:
print("Discarding citation : ", meta["title"], meta["authors_list"])
continue

# Discard if already found
if meta["doi"] in dois:
continue
dois.add(meta["doi"])

print("Saving :")
print(" ", meta["title"])
print(" ", meta["authors_list"])
if gysela_in_title or gysela_in_abstract:
print("Mentioning Gysela prominently")
if written_by_key_author:
print("Written by permanent contributor")
print()

slug = make_slug(meta, abbrev_map)
folder = PUBLICATION_DIR / slug
folder.mkdir(parents=True, exist_ok=True)

# Write index.md
write_index_md(folder, meta)

# Write cite.bib
bibtex = to_bibtex(meta, slug, abbrev_map)
(folder / "cite.bib").write_text(bibtex, encoding="utf-8")

if __name__ == "__main__":
main()

36 changes: 36 additions & 0 deletions scripts/venue_abbreviations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Journal of Computational Physics:
slug: jcp
bibtex: "J. Comput. Phys."
Journal of Plasma Physics:
slug: jpp
bibtex: "J. Plasma Phys."
Computer Physics Communications:
slug: cpc
bibtex: "Comput. Phys. Commun."
Concurrency and Computation Practice and Experience:
slug: ccpe
bibtex: "Concurrency and Computation Practice and Experience"
Plasma Physics and Controlled Fusion:
slug: ppcf
bibtex: "Plasma Phys. Controlled Fusion"
SMAI Journal of Computational Mathematics:
slug: smai
bibtex: "SMAI Journal of Computational Mathematics"
Communications Physics:
slug: cp
bibtex: "Commun. Phys."
The International Journal of High Performance Computing Applications:
slug: ijhpca
bibtex: "Int. J. High Perform. Comput. Appl."
Physics of Plasmas:
slug: po-p
bibtex: "Phys. Plasmas"
Nuclear Fusion:
slug: nf
bibtex: "Nucl. Fusion"
Physical review. E:
slug: pre
bibtex: "Phys. Rev. E"
Physical Review Letters:
slug: prl
bibtex: "Phys. Rev. Lett."