In [28]:
import yaml
from collections import defaultdict

def parse_authors(author_str):
    # Replace ' and ' with ', ' so all separators are commas
    assert ', and' in author_str

    author_str = author_str.replace(', …,', ',')
    
    author_str = author_str.replace(', and', ', ')
    parts = [a.strip() for a in author_str.split(',')]

    # Group as pairs: "Lastname, Initials"
    authors = [f"{parts[i]}, {parts[i+1]}" for i in range(0, len(parts), 2)]
    return authors

# Load the YAML file
with open('publications.yml', 'r', encoding='utf-8') as f:
    data = yaml.safe_load(f)

# Initialize author dicts
all_authors = defaultdict(list)
first_authors = defaultdict(list)

# Loop through categories and entries
for category in data:
    for entry in category['entries']:
        title = entry['title']
        author_str = entry['authors']

        authors = parse_authors(author_str)
        if not authors:
            continue

        # Track all authors
        for author in authors:
            all_authors[author].append(title)

        # Track first author
        first_authors[authors[0]].append(title)

# Example outputs
print(f"Total unique authors: {len(all_authors)}")
print(f"Total unique first authors: {len(first_authors)}")

# If you want to inspect one
example = "Maussion, F."
print(f"\nPublications by {example}:")
for pub in all_authors[example]:
    print(f"- {pub}")

Total unique authors: 348
Total unique first authors: 60

Publications by Maussion, F.:
- Recent observations and glacier modeling point towards near complete glacier loss in western Austria (Ötztal and Stubai mountain range) if 1.5 °C is not met
- Decadal re-forecasts of glacier climatic mass balance
- Irreversible glacier change and trough water for centuries after overshooting 1.5°C
- Inter-model differences in 21st Century Glacier Runoff for the World’s Major River Basins
- Glacier preservation doubled by limiting warming to 1.5°C
- Reversal of the impact chain for actionable climate information
- Unravelling the sources of uncertainty in glacier runoff projections in the Patagonian Andes (40–56° S)
- Coupling a large-scale glacier and hydrological model (OGGM v1.5.3 and CWatM V1.08) – Towards an improved representation of mountain water resources in global assessments
- Twenty-first century global glacier evolution under CMIP6 scenarios and the role of glacier-specific observation

In [29]:
first_authors.keys()

dict_keys(['Caro, A.', 'Hartl, L.', 'Kanzow, T.', 'van der Laan, L.', 'Luo, Y.', 'Mackay, J. D.', 'Malles, J.-H.', 'Schuster, L.', 'Shafeeque, M.', 'Vergnano, A.', 'Wimberly, F.', 'Zekollari, H.', 'Shutkin, T. Y.', 'Haq, F.', 'Švinka, L.', 'Pfleiderer, P.', 'Yaka, T.', 'Aguayo, R.', 'Chen, X.', 'Diaconu, C. -A.', 'Hanus, S.', 'Hu, S.', 'Kang, L.', 'Li, T.', 'Möller, M.', 'Reinthaler, J.', 'Tober, B.S.', 'Wang L, Yang S', 'Xiao, L.', 'Yang, W.', 'Zhou, B.', 'Afzal, M. M.', 'Bolibar, J.', 'Hock, R.', 'Li, F.', 'Malles, J.', 'O’Kane, T. J.', 'Pesci, M. H.', 'Recinos, B.', 'Ross, A. C.', 'Rounce, D. R.', 'Tang, S.', 'Yang, L.', 'Zhao, H.', 'Bouchayer, C.', 'Chen, W.', 'Furian, W.', 'Nidheesh, G.', 'Yang, M.', 'Dixit, A.', 'Edwards, T. L.', 'Eis, J.', 'Pronk, J. B.', 'Khadka, M.', 'Marzeion, B.', 'Parkes, D.', 'Pelto, B. M.', 'Farinotti, D.', 'Maussion, F.', 'Goosse, H.'])

In [30]:
author_str = author_str.replace(' and ', ', ')
author_str

'Farinotti, D., Brinkerhoff, D. J., Clarke, G. K. C., Fürst, J. J., Frey, H., Gantayat, P., Gillet-Chaulet, F., Girard, C., Huss, M., Leclercq, P. W., Linsbauer, A., Machguth, H., Martin, C., Maussion, F., Morlighem, M., Mosbeux, C., Pandit, A., Portmann, A., Rabatel, A., …,, Andreassen, L. M.'

In [31]:
import pandas as pd

In [35]:
# Count publications
author_pubs = defaultdict(list)

for category in data:
    for entry in category['entries']:
        title = entry['title']
        authors = parse_authors(entry['authors'])
        for author in authors:
            author_pubs[author].append(title)

# Count first-author publications
first_author_pubs = defaultdict(list)

for category in data:
    for entry in category['entries']:
        title = entry['title']
        authors = parse_authors(entry['authors'])
        if authors:
            first_author = authors[0]
            first_author_pubs[first_author].append(title)

In [36]:

# Create a pandas Series
pub_counts = pd.Series({author: len(titles) for author, titles in author_pubs.items()})
pub_counts.name = "n_publications"
pub_counts.index.name = "author"

# Sort if you want
pub_counts = pub_counts.sort_values(ascending=False)

print(pub_counts.head())

author
Maussion, F.       29
Marzeion, B.       18
Schuster, L.        9
Huss, M.            7
Champollion, N.     6
Name: n_publications, dtype: int64


In [37]:
# Create pandas Series
first_author_counts = pd.Series({author: len(titles) for author, titles in first_author_pubs.items()})
first_author_counts.name = "n_publications"
first_author_counts.index.name = "first_author"

# Optional: sort by number of publications
first_author_counts = first_author_counts.sort_values(ascending=False)

print(first_author_counts)

first_author
Yang, W.            3
Recinos, B.         3
Caro, A.            2
van der Laan, L.    2
Farinotti, D.       2
Hartl, L.           2
Schuster, L.        2
Shafeeque, M.       2
Hanus, S.           2
Zekollari, H.       2
Eis, J.             2
Rounce, D. R.       2
Tang, S.            1
Zhao, H.            1
Yang, L.            1
Pesci, M. H.        1
Ross, A. C.         1
O’Kane, T. J.       1
Bouchayer, C.       1
Yang, M.            1
Chen, W.            1
Furian, W.          1
Nidheesh, G.        1
Li, F.              1
Dixit, A.           1
Edwards, T. L.      1
Pronk, J. B.        1
Khadka, M.          1
Marzeion, B.        1
Parkes, D.          1
Pelto, B. M.        1
Maussion, F.        1
Malles, J.          1
Zhou, B.            1
Hock, R.            1
Bolibar, J.         1
Kanzow, T.          1
Luo, Y.             1
Mackay, J. D.       1
Malles, J.-H.       1
Vergnano, A.        1
Wimberly, F.        1
Shutkin, T. Y.      1
Haq, F.             1
Švinka, L.         

In [5]:
all_authors.keys()

dict_keys(['Caro', 'A.', 'Condom', 'T.', 'Rabatel', 'Aguayo', 'R.', 'and Champollion', 'N.', 'Hartl', 'L.', 'Schmitt', 'P.', 'Schuster', 'Helfricht', 'K.', 'Abermann', 'J.', 'and Maussion', 'F.', 'Kanzow', 'et al.s', 'van der Laan', 'Vlug', 'Scaife', 'A. A.', 'Maussion', 'and Förster', 'Luo', 'Y.', 'Afzal', 'M.M.', 'and Wang', 'X.', 'Mackay', 'J. D.', 'Barrand', 'N. E.', 'Hannah', 'D. M.', 'Potter', 'E.', 'Montoya', 'and Buytaert', 'W.', 'Malles', 'J.-H.', 'Marzeion', 'B.', 'and Myers', 'P. G.', 'Rounce', 'D.', 'Ultee', 'Lacroix', 'Frölicher', 'Schleussner', 'C.F.', 'Shafeeque', 'M.', 'Möller', 'and Marzeion', 'Vergnano', 'Franco', 'and Godio', 'Wimberly', 'Huss', 'D. R.', 'Coats', 'S.', 'and Holmgren', 'Zekollari', 'H.', 'Hock', 'Compagno', 'Fujita', 'James', 'Kraaijenbrink', 'Lipscomb', 'Minallah', 'Oberrauch', 'van Tricht', 'Champollion', 'Edwards', 'Farinotti', 'Immerzeel', 'Leguy', 'G.', 'Sakai', 'Shutkin', 'T. Y.', 'Mark', 'B. G.', 'Stansell', 'N. D.', 'Cruz Encarnación', 'Breche