In [None]:
import pandas as pd
import requests
import time

# Function to fetch JSON on update-nature
def fetch_update_nature(doi):
    url = f"https://api.labs.crossref.org/works/{doi}?mailto=christian.hauschke@tib.eu"
    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()
        updates = json_data.get('message', {}).get('cr-labs-updates', [])
        return ";".join([update.get('update-nature') for update in updates])
    else:
        return ""

# Read CSV file and start from specified line/DOI
csv_filename = "OpenAlex_retractions.csv"
start_line = 0  # Set to the line number/DOI from which to start
batch_size = 100
openalexexport_df = pd.read_csv(csv_filename, skiprows=range(1, start_line), nrows=batch_size)

# Extend DataFrame with update_nature column
openalexexport_df['update_nature'] = openalexexport_df['doi'].apply(fetch_update_nature)

# Export CSV file every 1500 DOIs
while not openalexexport_df.empty:
    export_filename = f"retraction_metadata_{start_line + len(openalexexport_df)}.csv"
    openalexexport_df.to_csv(export_filename, index=False)
    print(f"Exported to {export_filename}")
    time.sleep(2)  # Wait for 2 seconds before exporting next batch
    start_line += batch_size
    openalexexport_df = pd.read_csv(csv_filename, skiprows=range(1, start_line), nrows=batch_size)
    openalexexport_df['update_nature'] = openalexexport_df['doi'].apply(fetch_update_nature)



Exported to retraction_metadata_100.csv
Exported to retraction_metadata_200.csv
Exported to retraction_metadata_300.csv
Exported to retraction_metadata_400.csv
Exported to retraction_metadata_500.csv
Exported to retraction_metadata_600.csv
Exported to retraction_metadata_700.csv
Exported to retraction_metadata_800.csv
Exported to retraction_metadata_900.csv
Exported to retraction_metadata_1000.csv
Exported to retraction_metadata_1100.csv
Exported to retraction_metadata_1200.csv
Exported to retraction_metadata_1300.csv
Exported to retraction_metadata_1400.csv
Exported to retraction_metadata_1500.csv
Exported to retraction_metadata_1600.csv
Exported to retraction_metadata_1700.csv
Exported to retraction_metadata_1800.csv
Exported to retraction_metadata_1900.csv
Exported to retraction_metadata_2000.csv
Exported to retraction_metadata_2100.csv
Exported to retraction_metadata_2200.csv
Exported to retraction_metadata_2300.csv
Exported to retraction_metadata_2400.csv
Exported to retraction_me

In [None]:
import os

# Get the current working directory
directory = os.getcwd()

# List all CSV files in the directory
csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]

# Initialize an empty DataFrame to store merged data
merged_df = pd.DataFrame()

# Iterate over each CSV file and merge data
for csv_file in csv_files:
    df = pd.read_csv(os.path.join(directory, csv_file))
    merged_df = pd.concat([merged_df, df], ignore_index=True)

# Export the merged DataFrame to a single CSV file
merged_filename = "merged_retraction_metadata.csv"
merged_df.to_csv(merged_filename, index=False)

print(f"Merged data exported to {merged_filename}")
