In [1]:
import requests
your_email = "xi183728@ucf.edu"
if "your_email" in your_email:
    print(
        "Nope, can't continue until you replace 'your_email@ucf.edu' with your email address in the code cell"
    )
else:
    print("Good to go!")

api_url = "https://api.crossref.org/members?rows=0"
api_response = requests.get(api_url, headers={"mailto": your_email})
print("The Crossref server responded with status code: ", api_response.status_code)

Good to go!
The Crossref server responded with status code:  200


In [28]:
import requests

def find_journal_issn(journal_name):
    url = f"https://api.crossref.org/journals?query={journal_name}&rows=5"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        items = data["message"]["items"]
        
        if items:
            journal = items[0]
            print(f"journal_name: {journal['title']}")
            print(f"ISSN: {journal.get('ISSN', [])}")
            print(f"publisher: {journal.get('publisher', '')}")

journals = [
    "Journal of Applied Psychology",
    "Academy of Management Journal",
    "Personnel Psychology"
]

for journal in journals:
    find_journal_issn(journal)

journal_name: Journal of Applied Psychology
ISSN: ['0021-9010', '1939-1854']
publisher: American Psychological Association
journal_name: Academy of Management Journal
ISSN: ['0001-4273', '1948-0989']
publisher: Academy of Management
journal_name: Personnel Psychology
ISSN: ['0031-5826', '1744-6570']
publisher: Wiley (Blackwell Publishing)


In [1]:
import requests
import pandas as pd
import time
from datetime import datetime
import json
from typing import List, Dict
import math

class JournalCollector:
    def __init__(self, email: str = "your@email.com"):
        self.base_url = "https://api.crossref.org/works"
        self.headers = {
            'User-Agent': f'JournalCollector/1.0 (mailto:{email})'
        }
        self.all_articles = []

    def fetch_articles(self, issn: str, rows: int = 100, offset: int = 0) -> dict:
        """Fetch articles with pagination"""
        params = {
            'filter': f'issn:{issn},from-pub-date:2015-01-01',
            'rows': rows,
            'offset': offset,
            'sort': 'published',
            'order': 'desc'
        }
        
        try:
            time.sleep(1)  # Rate limiting
            response = requests.get(self.base_url, params=params, headers=self.headers)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            print(f"Error fetching data: {str(e)}")
            return None

    def extract_article_info(self, article: Dict) -> Dict:
        """Extract required fields from article data"""
        try:
            # Get publication year
            pub_date = article.get('published')
            if pub_date and 'date-parts' in pub_date and pub_date['date-parts']:
                year = pub_date['date-parts'][0][0]
            else:
                year = None

            # Extract article information
            info = {
                'doi': article.get('DOI'),
                'title': article.get('title', [None])[0],
                'references_count': article.get('references-count'),
                'authors_count': len(article.get('author', [])),
                'citations_count': article.get('is-referenced-by-count'),
                'url': article.get('URL'),
                'journal': article.get('container-title', [None])[0],
                'abstract': article.get('abstract', ''),
                'publication_year': year
            }
            return info
        except Exception as e:
            print(f"Error extracting article info: {str(e)}")
            return None

    def collect_journal_articles(self, issn: str, journal_name: str):
        """Collect all articles for a journal since 2015"""
        print(f"\nCollecting articles for {journal_name} (ISSN: {issn})...")
        
        # Get total number of articles
        initial_data = self.fetch_articles(issn, rows=1)
        if not initial_data:
            return
        
        total_results = initial_data['message']['total-results']
        total_pages = math.ceil(total_results / 100)
        
        print(f"Found {total_results} articles. Fetching {total_pages} pages...")
        
        # Fetch all pages
        for page in range(total_pages):
            offset = page * 100
            data = self.fetch_articles(issn, rows=100, offset=offset)
            
            if not data or 'message' not in data:
                continue
                
            for article in data['message']['items']:
                article_info = self.extract_article_info(article)
                if article_info:
                    self.all_articles.append(article_info)
                    
            print(f"Processed page {page + 1}/{total_pages}")

    def save_to_csv(self, filename: str = "ten_years_api.csv"):
        """Save collected articles to CSV"""
        if self.all_articles:
            df = pd.DataFrame(self.all_articles)
            df.to_csv(filename, index=False)
            print(f"\nSaved {len(self.all_articles)} articles to {filename}")
        else:
            print("No articles to save")

def main():
    journals = [
        {'name': 'Journal of Applied Psychology', 'issn': '1939-1854'},
        {'name': 'Academy of Management Journal', 'issn': '1948-0989'},
        {'name': 'Personnel Psychology', 'issn': '1744-6570'}
    ]
    
    collector = JournalCollector()
    
    # Collect articles from each journal
    for journal in journals:
        collector.collect_journal_articles(journal['issn'], journal['name'])
    
    # Save all articles to CSV
    collector.save_to_csv("ten_years_api.csv")

if __name__ == "__main__":
    main()


Collecting articles for Journal of Applied Psychology (ISSN: 1939-1854)...
Found 1441 articles. Fetching 15 pages...
Processed page 1/15
Processed page 2/15
Processed page 3/15
Processed page 4/15
Processed page 5/15
Processed page 6/15
Processed page 7/15
Processed page 8/15
Processed page 9/15
Processed page 10/15
Processed page 11/15
Processed page 12/15
Processed page 13/15
Processed page 14/15
Processed page 15/15

Collecting articles for Academy of Management Journal (ISSN: 1948-0989)...
Found 819 articles. Fetching 9 pages...
Processed page 1/9
Processed page 2/9
Processed page 3/9
Processed page 4/9
Processed page 5/9
Processed page 6/9
Processed page 7/9
Processed page 8/9
Processed page 9/9

Collecting articles for Personnel Psychology (ISSN: 1744-6570)...
Found 612 articles. Fetching 7 pages...
Processed page 1/7
Processed page 2/7
Processed page 3/7
Processed page 4/7
Processed page 5/7
Processed page 6/7
Processed page 7/7

Saved 2872 articles to ten_years_api.csv


In [4]:

import pandas as pd
import scipy.stats as stats
from pathlib import Path
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

def analyze_publications():
    # Read the data
    df = pd.read_csv('ten_years_api.csv')
    
    # 1. Calculate correlation between publication year and citation count
    correlation, p_value = stats.pearsonr(
        df['publication_year'], 
        df['citations_count']
    )
    
    # 2. Calculate ANOVA for journal differences
    journals = df['journal'].unique()
    journal_groups = [df[df['journal'] == journal]['citations_count'] 
                     for journal in journals]
    
    f_stat, anova_p = stats.f_oneway(*journal_groups)
    
    # Conduct Tukey's HSD test for post-hoc analysis
    tukey = pairwise_tukeyhsd(
        df['citations_count'],
        df['journal']
    )
    
    # Calculate mean citations by journal
    journal_means = df.groupby('journal')['citations_count'].agg(['mean', 'std'])
    
    # Create the report
    report = [
        "Publication Statistics Analysis",
        "=" * 30 + "\n",
        "1. Correlation Analysis",
        "-" * 20,
        f"Correlation between publication year and citation count: {correlation:.4f}",
        f"P-value: {p_value:.4f}\n",
        
        "2. ANOVA Analysis",
        "-" * 20,
        f"F-statistic: {f_stat:.4f}",
        f"P-value: {anova_p:.4f}\n",
        
        "3. Mean Citations by Journal",
        "-" * 20
    ]
    
    for journal in journal_means.index:
        report.append(
            f"{journal}:"
            f" Mean = {journal_means.loc[journal, 'mean']:.2f},"
            f" SD = {journal_means.loc[journal, 'std']:.2f}"
        )
    
    report.extend([
        "\n4. Tukey's HSD Post-hoc Analysis",
        "-" * 20,
        str(tukey)
    ])
    
    # Save results
    with open('publication_stats.txt', 'w') as f:
        f.write('\n'.join(report))
    
    print("Analysis complete. Results saved to publication_stats.txt")
    
    # Return results for potential further use
    return {
        'correlation': correlation,
        'correlation_p': p_value,
        'anova_f': f_stat,
        'anova_p': anova_p,
        'journal_means': journal_means,
        'tukey': tukey
    }

if __name__ == "__main__":
    analyze_publications()

Analysis complete. Results saved to publication_stats.txt
