# Get DOI of papers from Harzing PoP csv files using CrossRef

In [2]:
import csv
import requests

def query_crossref(title, authors, year, journal, article_url, publisher):
    query_params = {
        'query.title': title,
        'query.author': authors,
        'query.container-title': journal,
        'query.bibliographic': year,
        'select': 'DOI',
        'rows': '1'  # Only the first (most relevant) result
    }
    # If an ArticleURL is provided, use it in the query
    if article_url:
        query_params['query'] = article_url
    # If a Publisher is provided, add it to the query
    if publisher:
        query_params['query.publisher'] = publisher
    
    response = requests.get('https://api.crossref.org/works', params=query_params)
    if response.status_code == 200:
        response_json = response.json()
        if response_json['message']['items']:
            return response_json['message']['items'][0].get('DOI', '')
    return ''

# Function to update the CSV with DOIs
def update_csv_with_dois(input_csv, output_csv):
    with open(input_csv, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        publications = list(reader)
    
    if 'DOI' not in publications[0]:
        for publication in publications:
            publication['DOI'] = ''
    
    for publication in publications:
        if not publication['DOI']:
            doi = query_crossref(
                publication['Title'],
                publication['Authors'],
                publication['Year'],
                publication['Source'],
                publication['ArticleURL'],  
                publication['Publisher']    
            )
            publication['DOI'] = doi
    
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=publications[0].keys())
        writer.writeheader()
        writer.writerows(publications)

# Example usage
update_csv_with_dois(r'C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites_.csv', 'updated_with_dois.csv')


# Get DOI of papers from Harzing PoP csv files using CrossRef and for multiple files

In [1]:
import csv
import requests
import os

def query_crossref(title, authors, year, journal, article_url, publisher):
    query_params = {
        'query.title': title,
        'query.author': authors,
        'query.container-title': journal,
        'query.bibliographic': year,
        'select': 'DOI',
        'rows': '1'  # Only the first (most relevant) result
    }
    # If an ArticleURL is provided, use it in the query
    if article_url:
        query_params['query'] = article_url
    # If a Publisher is provided, add it to the query
    if publisher:
        query_params['query.publisher'] = publisher
    
    response = requests.get('https://api.crossref.org/works', params=query_params)
    if response.status_code == 200:
        response_json = response.json()
        if response_json['message']['items']:
            return response_json['message']['items'][0].get('DOI', '')
    return ''

# Function to update the CSV with DOIs
def update_csv_with_dois(input_csv, output_csv):
    with open(input_csv, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        publications = list(reader)
    
    if 'DOI' not in publications[0]:
        for publication in publications:
            publication['DOI'] = ''
    
    for publication in publications:
        if not publication['DOI']:
            doi = query_crossref(
                publication['Title'],
                publication['Authors'],
                publication['Year'],
                publication['Source'],
                publication['ArticleURL'],  
                publication['Publisher']    
            )
            publication['DOI'] = doi
    
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=publications[0].keys())
        writer.writeheader()
        writer.writerows(publications)

# Function to process multiple CSV files
def process_multiple_csv_files(file_pattern, file_count):
    for i in range(1, file_count + 1):
        input_csv = f'{file_pattern}{i}.csv'
        output_csv = f'{file_pattern}{i}_updated.csv'
        if os.path.exists(input_csv):
            update_csv_with_dois(input_csv, output_csv)
            print(f'Processed {input_csv} and saved results to {output_csv}')
        else:
            print(f'File {input_csv} does not exist.')

# Example usage
process_multiple_csv_files(r'C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites', 21)


Processed C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites1.csv and saved results to C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites1_updated.csv
Processed C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites2.csv and saved results to C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites2_updated.csv
Processed C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites3.csv and saved results to C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites3_updated.csv
Processed C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites4.csv and saved results to C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites4_updated.csv
Processed C:

## Keep only DOI in a single csv file

In [4]:
import csv
import os

# Function to extract DOIs from a CSV file and return them as a list
def extract_dois_from_csv(file_path):
    with open(file_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        return [row['DOI'] for row in reader if row['DOI']]

# Function to stack DOIs from multiple CSV files into a single CSV file
def stack_dois_to_single_csv(file_pattern, file_count, output_file):
    all_dois = []
    for i in range(1, file_count + 1):
        input_csv = f'{file_pattern}{i}_updated.csv'
        if os.path.exists(input_csv):
            all_dois.extend(extract_dois_from_csv(input_csv))
    
    with open(output_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['DOI'])  # Write header
        writer.writerows([[doi] for doi in all_dois])  # Write DOIs

# Example usage
stack_dois_to_single_csv(r'C:\Users\GODLEWSKI\OneDrive - unistra.fr\EDITORIAL & REFEREEING\Revue FINANCE\ranking\PoP\PopCites', 21, 'all_dois_for_vos_viewer.csv')