In [None]:
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
# PRT820: THE INFLUENCE OF POST-PUBLICATION CORRESPONDENCE ON RESEARCH PAPERS                #
# STUDENT: ANNE TA - S359453                                                                 #
# Code Objective: Using Zero-Shot Classification to categorize PPC topics                    #
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

In [1]:
#-------------------------------------------------------------
# DEFINE BASIC FUNCTION FOR THE READING DATA FROM FILE
#-------------------------------------------------------------
import pandas as pd
#=======================================
# Define global variables for file path
#=======================================
def get_var(var_name):
    variable_filename = "variable/variable.txt"
    # Read the text file
    with open(variable_filename, 'r') as file:
        lines = file.readlines()

    # Initialize a dictionary to store the variables
    variables = {}

    # Process each line in the file
    for line in lines:
        # Split each line into variable name and value
        parts = line.strip().split(',')
        if len(parts) == 2:
            # Store the variable name and value in the dictionary
            variables[parts[0].strip()] = parts[1].strip()

    return variables[var_name]

#================================================================
# Define a function to read data from a CSV file into a DataFrame
#================================================================
def read_csv(filename, ec='ISO-8859-1'):
    try:
        # Load CSV data into DataFrame
        data_df = pd.read_csv(filename, encoding=ec)
        return data_df
    
    # Handle the case where the file is not found
    except FileNotFoundError:
        print("File not found. Please check the file path.")
        
    # Handle any other exceptions that might occur during reading the CSV file
    except Exception as e:
        print("An error occurred:", e)

In [None]:
#-------------------------------------------------------------
# PULL CITATION DATA BY USING SCOPUS API
#-------------------------------------------------------------
import pandas as pd
import pybliometrics
from pybliometrics.scopus import CitationOverview
# Initialize the library of Scopus Bliometric
pybliometrics.scopus.init()


#====================================================================
# Process citation data for Original Articles
#====================================================================
# Get file path of raw data
raw_data_filepath = get_var('raw_data_filepath')

# Read file paths from a CSV file into a DataFrame
ppc_article_df = read_csv(raw_data_filepath)

# Extract the DOI_OA column
doi_list = ppc_article_df['DOI_OA'].unique()
min_year_OA = ppc_article_df['Year_OA'].min()
timespan = str(min_year_OA) + "-2024"
print(timespan)


# Initialize an empty list to store the results
results = []

# Loop through each OA DOI and get the citation overview
for doi in doi_list:
    co = CitationOverview([doi], id_type='doi', date=timespan)
    
    # co.cc contains a list of tuples with (Year, CitationCount)
    for year, citation_count in co.cc[0]:  # co.cc is a nested list, so we access the first element [0]
        results.append({'Year': year, 'CitationCount': citation_count, 'DOI_OA': doi})

# Create a DataFrame from the results
citation_df = pd.DataFrame(results)
# print(citation_df)


# Get file path of to store the original article's citation data
article_citation_filepath = get_var('article_citation_filepath')
# Save the DataFrame to a CSV file if needed
citation_df.to_csv(article_citation_filepath, index=False)

1973-2024
