# Scholar.py Citation Counter
This notebook will use [scholar.py](https://github.com/ckreibich/scholar.py) to fetch citation counts for each DOI in your groups, aggregate by year (2018-2025), and output a summary CSV.

In [12]:
# Install scholar.py and dependencies
!git clone https://github.com/ckreibich/scholar.py.git
!pip install requests lxml

fatal: destination path 'scholar.py' already exists and is not an empty directory.


In [13]:
import pandas as pd
import ast
df = pd.read_csv('dois_by_topic.csv')
# Convert string representation of list to actual list
df['doi_list'] = df['doi_list'].apply(ast.literal_eval)
groups = df['doi_list'].tolist()
print(f"Loaded {len(groups)} groups of DOIs.")

Loaded 20 groups of DOIs.


In [14]:
import subprocess
import os
def get_citations_for_doi(doi):
    # Call scholar.py script using subprocess
    script_path = os.path.join('scholar.py', 'scholar.py') if os.path.isdir('scholar.py') else 'scholar.py'
    try:
        result = subprocess.run(["python3", script_path, "-c", doi], capture_output=True, text=True, timeout=30)
        output = result.stdout
        # Parse citation count from output
        for line in output.splitlines():
            if "Citations" in line:
                try:
                    return int(line.split(':')[-1].strip())
                except ValueError:
                    continue
    except Exception as e:
        print(f"Error fetching citations for DOI {doi}: {e}")
    return 0

In [16]:
# Test citation counting for group 0 only
test_group_idx = 0
test_doi_list = groups[test_group_idx]
test_total_citations = 0
for doi in test_doi_list:
    try:
        count = get_citations_for_doi(doi)
        print(f"DOI: {doi}, Citations: {count}")
        test_total_citations += count
    except Exception as e:
        print(f"Error for DOI {doi}: {e}")
print(f"Total citations for group {test_group_idx}: {test_total_citations}")

DOI: 10.1111/apa.17349, Citations: 0
DOI: 10.1038/s41598-024-68467-8, Citations: 0
DOI: 10.3390/reprodmed1020008, Citations: 0
DOI: 10.1016/j.dcn.2022.101150, Citations: 0
DOI: 10.1016/j.dcn.2023.101261, Citations: 0
DOI: 10.1016/j.dcn.2022.101150, Citations: 0
DOI: 10.1016/j.dcn.2023.101261, Citations: 0
DOI: 10.1016/j.jadohealth.2024.06.0, Citations: 0
DOI: 10.1016/j.ypmed.2023.107452, Citations: 0
DOI: 10.1016/j.dcn.2023.101227, Citations: 0
DOI: 10.1016/j.jadohealth.2024.06.0, Citations: 0
DOI: 10.1016/j.ypmed.2023.107452, Citations: 0
DOI: 10.1016/j.dcn.2023.101227, Citations: 0
DOI: 10.1038/s41380-023-02316-4, Citations: 0
DOI: 10.1007/s00787-024-02620-6, Citations: 0
DOI: 10.1038/s41380-023-02316-4, Citations: 0
DOI: 10.1007/s00787-024-02620-6, Citations: 0
DOI: 10.1016/j.addbeh.2024.108211, Citations: 0
DOI: 10.1007/s10578-019-00892-7, Citations: 0
DOI: 10.1186/s13293-024-00604-4, Citations: 0
DOI: 10.1016/j.addbeh.2024.108211, Citations: 0
DOI: 10.1007/s10578-019-00892-7, Cita

In [15]:
results = []
for group_idx, doi_list in enumerate(groups):
    total_citations = 0
    for doi in doi_list:
        try:
            count = get_citations_for_doi(doi)
            total_citations += count
        except Exception as e:
            print(f"Error for DOI {doi}: {e}")
    results.append({'group': group_idx, 'total_citations': total_citations})
result_df = pd.DataFrame(results)
result_df.set_index('group', inplace=True)
result_df.to_csv('citations_by_group.csv')
print("Saved citation counts to citations_by_group.csv")

Saved citation counts to citations_by_group.csv
