In [38]:
# install springer nature api if not installed
!pip install springernature-api-client



In [39]:
apiKey = 'your_API_key_granted_in_springernature_website'

In [40]:
import springernature_api_client.openaccess as openaccess
openaccess_client = openaccess.OpenAccessAPI(api_key = apiKey)

In [None]:
# build your query
query = "(keyword:NLP AND keyword:Transformer) OR (keyword:'Natural Language Processing' AND keyword:Transformer) AND type:{(Journal)} AND onlinedatefrom:2024-01-01 AND onlinedateto:2025-06-20"

result = openaccess_client.search(q=query, p=25, s=1, fetch_all=False, is_premium=False)

In [42]:
# Show results
print(f"Total number of results is {len(result['records'])} \n---")
for record in result['records']:
    print(f"Title: {record.get('title')}")
    print("---")

Total number of results is 25 
---
Title: The usage of a transformer based and artificial intelligence driven multidimensional feedback system in english writing instruction
---
Title: Unleashing the transformers: NLP models detect AI writing in education
---
Title: Transformers to the rescue: alleviating data scarcity in arabic grammatical error correction with pre-trained models
---
Title: Emotion-Aware RoBERTa enhanced with emotion-specific attention and TF-IDF gating for fine-grained emotion recognition
---
Title: Transformer-enhanced hierarchical encoding with multi-decoder for diversified MCQ distractor generation
---
Title: Cross language transformation of free text into structured lobectomy surgical records from a multi center study
---
Title: An LLM-based hybrid approach for enhanced automated essay scoring
---
Title: Industrial applications of large language models
---
Title: MISTIC: a novel approach for metastasis classification in Italian electronic health records using tra

In [26]:
# Get ISSN or fall back to eISSN if ISSN is empty
for record in result['records']:
    print(f"Title: {record.get('title')}")
    issn = record.get('issn', '') or record.get('eIssn', '')
    record['key_issn'] = issn if issn else 'Not available'
    print(f"key_issn: {record['key_issn']}")
    print("---")

Title: The usage of a transformer based and artificial intelligence driven multidimensional feedback system in english writing instruction
key_issn: 2045-2322
---
Title: Unleashing the transformers: NLP models detect AI writing in education
key_issn: 2197-9987
---
Title: Transformers to the rescue: alleviating data scarcity in arabic grammatical error correction with pre-trained models
key_issn: 0941-0643
---
Title: Emotion-Aware RoBERTa enhanced with emotion-specific attention and TF-IDF gating for fine-grained emotion recognition
key_issn: 2045-2322
---
Title: Transformer-enhanced hierarchical encoding with multi-decoder for diversified MCQ distractor generation
key_issn: 1573-7462
---
Title: Cross language transformation of free text into structured lobectomy surgical records from a multi center study
key_issn: 2045-2322
---
Title: An LLM-based hybrid approach for enhanced automated essay scoring
key_issn: 2045-2322
---
Title: Industrial applications of large language models
key_iss

In [43]:
import json

# Save to file
with open('bulkArticles.json', 'w') as f:
    json.dump(result, f)

In [44]:
import json

# Read the bulk file of articles downloaded from the API
with open('bulkArticles.json', 'r') as f:
    data = json.load(f)

In [35]:
import requests
from bs4 import BeautifulSoup
import time
import re

def get_scimago_metrics(issn):
    """Get journal metrics from SCImago by following their search→profile workflow"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept-Language': 'en-US,en;q=0.9'
    }
    
    try:
        # Step 1: Perform ISSN search
        search_url = f"https://www.scimagojr.com/journalsearch.php?q={issn}"
        response = requests.get(search_url, headers=headers, timeout=10)
        response.raise_for_status()
        
        # Check for CAPTCHA
        if "captcha" in response.text.lower():
            return {'error': 'CAPTCHA triggered - Try manual search first'}
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Step 2: Find the journal link in search results
        results_div = soup.find('div', class_='search_results')
        if not results_div:
            return {'error': 'No search results found'}
            
        journal_link = results_div.find('a', href=True)
        if not journal_link:
            return {'error': 'Journal link not found in results'}
        
        # Step 3: Follow to journal profile page
        journal_url = "https://www.scimagojr.com/" + journal_link['href']
        time.sleep(3)  # Be polite
        
        journal_response = requests.get(journal_url, headers=headers, timeout=10)
        journal_soup = BeautifulSoup(journal_response.text, 'html.parser')
        
        # Step 4: Extract metrics - UPDATED H-INDEX SELECTOR
        h_index_elements = journal_soup.find_all('p', class_='hindexnumber')
        h_index = h_index_elements[1].text.strip() if len(h_index_elements) > 1 else 'N/A'
        sjr = h_index_elements[0].text.strip() if len(h_index_elements) > 1 else 'N/A'
        
        return {
            'journal': journal_link.get_text(strip=True),
            'h_index': h_index,
            'sjr': sjr
        }
        
    except Exception as e:
        return {'error': f'SCImago processing failed: {str(e)}'}

def get_journal_metrics(issn):
    """Main function to get metrics for a journal ISSN"""
    # Clean ISSN format
    issn = re.sub(r'[^0-9X]', '', issn.upper())
    if len(issn) != 8:
        print(f"error: key_issn {issn} Invalid ISSN format (must be 8 characters)")
        return {'error': 'Invalid ISSN format (must be 8 characters)'}
    
    # Try SCImago first
    scimago_result = get_scimago_metrics(issn)
    if not scimago_result.get('error'):
        return {'source': 'SCImago', 'issn': issn, **scimago_result}

    print(f"error: Journal not found in SCImago, key_issn: {issn}")
    return {'error': 'Journal not found in SCImago', 'issn': issn}

# Augmenting the papers with their journals h-index
if __name__ == "__main__":
    
    # some issns for testing, use if needed
    """
    test_issns = [
        "2045-2322",  # Scientific Reports
        "2197-9987",  # Journal of Computers in Education
        "1234-5678"   # Invalid ISSN
    ]
    """
    
    for record in data['records']:
        print(f"Title: {record.get('title')}")

        # uncomment if key_issn was not addressed earlier, for us key_issn is issn that fallsback to eIssn when issn is not available
        #issn = record.get('issn', '') or record.get('eIssn', '')
        #record['key_issn'] = issn if issn else 'Not available'
        
        print(f"\nFetching metrics for ISSN/eISSN: {record['key_issn']}:")
        start_time = time.time()
        metrics = get_journal_metrics(record['key_issn'])
        h_index = metrics.get('h_index', 9999) if metrics else 9999
        print(f"h_index {h_index}")
        record['h_index'] = h_index
        print(f"Results took ({time.time()-start_time:.2f}s)")
        print("---")
        
        #print(metrics)
        time.sleep(5)  # Rate limiting

Title: The usage of a transformer based and artificial intelligence driven multidimensional feedback system in english writing instruction

Fetching metrics for ISSN/eISSN: 2045-2322:
h_index 347
Results took (3.46s)
---
Title: Unleashing the transformers: NLP models detect AI writing in education

Fetching metrics for ISSN/eISSN: 2197-9987:
h_index 31
Results took (3.39s)
---
Title: Transformers to the rescue: alleviating data scarcity in arabic grammatical error correction with pre-trained models

Fetching metrics for ISSN/eISSN: 0941-0643:
h_index 146
Results took (3.44s)
---
Title: Emotion-Aware RoBERTa enhanced with emotion-specific attention and TF-IDF gating for fine-grained emotion recognition

Fetching metrics for ISSN/eISSN: 2045-2322:
h_index 347
Results took (3.44s)
---
Title: Transformer-enhanced hierarchical encoding with multi-decoder for diversified MCQ distractor generation

Fetching metrics for ISSN/eISSN: 1573-7462:
h_index 138
Results took (4.45s)
---
Title: Cross l

In [36]:
import json

# Save to file
with open('bulkArticlesAugmented.json', 'w') as f:
    json.dump(result, f)