In [None]:
# pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import json
import requests
import pytz
import psycopg2
import os
import time
import re
import json
import ast
from datetime import timedelta, datetime
from pandas import json_normalize
from dotenv import load_dotenv
from requests.exceptions import Timeout, RequestException
from psycopg2 import sql
from psycopg2.extras import execute_values

In [4]:
def get_credentials():
    """Load and validate credentials from environment variables."""

    print('get_credentials() method')
    load_dotenv()  # Load .env file

    # Facebook credentials
    scopus_api_key = os.getenv("SCOPUS_API_KEY")
    if not scopus_api_key:
        raise ValueError("SCOPUS_API_KEY is missing in .env!")

    scopus_credentials = {
        "access_token": scopus_api_key,
        "scopus_label": os.getenv("SCOPUS_LABEL")
    }

    # Database credentials
    db_credentials = {
        "hostname": os.getenv("DB_HOST"),
        "port": int(os.getenv("DB_PORT")),  # Convert to integer
        "username": os.getenv("DB_USER"),
        "password": os.getenv("DB_PASSWORD"),
        "database": os.getenv("DB_NAME"),
        "schema": os.getenv("DB_SCHEMA")
    }

    return scopus_credentials, db_credentials

In [5]:
def scopus_api_caller(url, params, headers, max_retries=3, timeout=20):
    print('scopus_api_caller() method')
    all_data = []
    retry_count = 0

    while url and retry_count < max_retries:
        try:
            print(f'Making request to URL: {url}')
            response = requests.get(
                url, params=params, headers=headers, timeout=timeout)
            print(f'Response status code: {response.status_code}')

            if response.status_code != 200:
                print(f'Error response content: {response.text}')

            response.raise_for_status()
            data = response.json()

            if 'search-results' in data and 'entry' in data['search-results']:
                all_data.extend(data['search-results']['entry'])
                print(
                    f"Collected {len(data['search-results']['entry'])} items. Total: {len(all_data)}")
            else:
                print("No data found in response")
                break

            # Check if there are more pages
            if 'link' in data['search-results']:
                next_link = next(
                    (link for link in data['search-results']['link'] if link['@ref'] == 'next'), None)
                if next_link:
                    url = next_link['@href']
                    params = {}  # Clear params as they're included in the next URL
                else:
                    url = None
                    print("No more pages")
            else:
                url = None
                print("No more pages")

            retry_count = 0  # Reset retry count on successful request

        except (Timeout, RequestException) as e:
            retry_count += 1
            print(
                f"Request failed: {e}. Retry attempt {retry_count} of {max_retries}")
            if retry_count == max_retries:
                print("Max retries reached. Exiting.")
                break
            time.sleep(2 ** retry_count)  # Exponential backoff

    print(f'Exiting scopus_api_caller. Total items collected: {len(all_data)}')
    return all_data

In [6]:
def scopus_search(scopus_credentials, query, start=0, count=25, sort='citedby-count', max_results=5000):
    print('scopus_search() method')

    scopus_api_key = scopus_credentials['access_token']
    url = 'https://api.elsevier.com/content/search/scopus'

    headers = {
        'X-ELS-APIKey': scopus_api_key,
        'Accept': 'application/json'
    }

    # list of fields
    fields = [
        'dc:identifier',  # Unique identifier
        'prism:doi',      # DOI for Abstract Retrieval API
        'prism:coverDate',  # Publication date
        'citedby-count',  # Citation count
        'prism:publicationName',  # Journal or conference name
        # Type of publication (e.g., Article, Conference Paper)
        'subtypeDescription',
    ]

    params = {
        'query': query,
        'field': ','.join(fields),
        'count': count,
        'start': start,
        'sort': sort
    }

    all_results = []
    total_results = None

    while len(all_results) < max_results:
        print(f'Full URL: {url}')
        print(f'Headers: {headers}')
        print(f'Params: {params}')

        # Fetch the data
        batch_results = scopus_api_caller(url, params, headers)

        if not batch_results:
            print("No results returned from API. Stopping search.")
            break

        all_results.extend(batch_results)

        # Check total number of results if not already set
        if total_results is None:
            total_results = int(batch_results[0].get(
                'search-results', {}).get('opensearch:totalResults', 0))
            print(f"Total results available: {total_results}")
            if total_results == 0:
                print("No results found for the given query.")
                break

        # Update start for the next page
        params['start'] = len(all_results)

        # Check if we've reached the end of results
        if len(all_results) >= total_results or len(all_results) >= max_results:
            print("All available results have been retrieved or max results reached.")
            break

        if len(batch_results) < count:
            print("Reached the end of available results.")
            break

    print(f'{len(all_results)} of SCOPUS data will be processed.')

    if not all_results:
        print("No results found for the given query.")

    return all_results

In [7]:
def process_list_item(item):
    if isinstance(item, dict):
        return [str(value) for value in item.values() if value]
    elif isinstance(item, str):
        return [item]
    elif isinstance(item, list):
        return [str(subitem) for subitem in item if subitem]
    else:
        return [str(item)] if item else []

In [8]:
def process_scopus_search_results(all_data):
    """Process Scopus search results data
    Return a dataframe with selected columns and database-friendly names
    """
    if not all_data:
        print("No data received from Scopus API")
        return pd.DataFrame()

    # Convert to DataFrame
    df = pd.json_normalize(all_data)

    # Function to clean column names
    def clean_column_name(name):
        # Replace non-alphanumeric characters with underscores
        name = re.sub(r'[^a-zA-Z0-9]', '_', name)
        # Replace multiple underscores with a single underscore
        name = re.sub(r'_+', '_', name)
        # Remove leading or trailing underscores
        name = name.strip('_')
        # Convert to lowercase
        return name.lower()

    # Clean column names
    df.columns = [clean_column_name(col) for col in df.columns]

    # Ensure all fields from scopus_search() are present
    expected_fields = [
        'dc_identifier',
        'prism_doi',
        'prism_coverdate',
        'citedby_count',
        'prism_publicationname',
        'subtypedescription'
    ]

    for field in expected_fields:
        if field not in df.columns:
            df[field] = None
            print(
                f"Warning: '{field}' not found in API response. Added as empty column.")

    # Convert numeric fields
    if 'citedby_count' in df.columns:
        df['citedby_count'] = pd.to_numeric(
            df['citedby_count'], errors='coerce')

    # Convert date fields
    if 'prism_coverdate' in df.columns:
        df['prism_coverdate'] = pd.to_datetime(
            df['prism_coverdate'], errors='coerce')

    # Add a column for publication year
    if 'prism_coverdate' in df.columns:
        df['publication_year'] = df['prism_coverdate'].dt.year
    else:
        print(
            "Warning: 'prism_coverdate' not found in the data. Using 2100 as fallback year.")
        df['publication_year'] = 2100

    # Ensure publication_year is always an integer
    df['publication_year'] = df['publication_year'].fillna(2100).astype(int)

    # Print column names and their types for debugging
    print("Column names and types:")
    print(df.dtypes)

    # Print the first few rows for debugging
    print("First few rows of the processed dataframe:")
    print(df.head())

    return df

In [9]:
def exclude_existing_results(new_results, existing_df):
    if existing_df.empty:
        return new_results

    existing_ids = set(existing_df['dc_identifier'].tolist())
    return [result for result in new_results if result.get('dc:identifier') not in existing_ids]



In [10]:
def scopus_research_procedures(years_to_process):
    try:
        scopus_credentials, db_credentials = get_credentials()  # pylint: disable=unused-variable

        max_results_per_api_call = 5000

        csv_file = 'polyu_research_output.csv'
        existing_df = pd.DataFrame()
        try:
            existing_df = pd.read_csv(csv_file)
            if 'publication_year' not in existing_df.columns:
                print("Adding missing publication_year column to existing data")
                existing_df['publication_year'] = pd.to_datetime(existing_df['prism_coverdate']).dt.year
            print(f"Loaded {len(existing_df)} existing records from {csv_file}")
        except FileNotFoundError:
            print(f"No existing file found at {csv_file}. Starting fresh.")
        except Exception as e:
            print(f"Error reading CSV file: {e}")

        all_new_results = []
        latest_df = existing_df.copy()  # Initialize latest_df with existing data

        for year in years_to_process:
            query = f"AFFIL(\"The Hong Kong Polytechnic University\") AND PUBYEAR = {year}"
            print(f"\nExecuting Scopus search with query: {query}")

            year_results = []
            start = 0
            while True:
                try:
                    polyu_results = scopus_search(
                        scopus_credentials, query, start=start, max_results=max_results_per_api_call)

                    if not polyu_results:
                        print(f"No more results found for {year}.")
                        break

                    year_results.extend(polyu_results)
                    start += len(polyu_results)

                    print(f"Retrieved {len(polyu_results)} results for {year}. Total for {year}: {len(year_results)}")

                    if len(polyu_results) < max_results_per_api_call:
                        print(f"Reached the end of available results for {year}.")
                        break

                except Exception as e:
                    print(f"Error during API call: {e}")
                    print("Saving current results and moving to next year.")
                    break

            # Process and save results for this year
            if year_results:
                try:
                    new_df = process_scopus_search_results(year_results)
                    if 'publication_year' not in new_df.columns:
                        # Ensure publication_year is added
                        new_df['publication_year'] = year

                    # Combine with existing data
                    latest_df = pd.concat([latest_df, new_df], ignore_index=True)
                    latest_df.drop_duplicates(subset='dc_identifier', keep='last', inplace=True)

                    # Try to save the results to a CSV file
                    try:
                        latest_df.to_csv(csv_file, index=False)
                        print(f"\nResults saved to '{csv_file}'. Total records: {len(latest_df)}")
                    except Exception as e:
                        print(f"Error saving CSV file: {e}")
                        print("Continuing with in-memory DataFrame.")

                except Exception as e:
                    print(f"Error processing results for {year}: {e}")

            all_new_results.extend(year_results)

        if not all_new_results:
            print("No new results to process across all years.")
        else:
            print(f"Total new results across all years: {len(all_new_results)}")

        return latest_df  # Return the latest DataFrame

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()  # Return an empty DataFrame in case of overall failure

In [3]:
# results_df = scopus_research_procedures()

In [11]:
# results_df.head()

In [12]:
## revised scopus_research_procedures function to exclude existing results

def scopus_research_procedures(years_to_process):
    try:
        scopus_credentials, _ = get_credentials()

        max_results_per_api_call = 5000

        csv_file = 'polyu_research_output.csv'
        metadata_file = 'scopus_search_metadata.json'
        
        # Load or initialize metadata
        try:
            with open(metadata_file, 'r') as f:
                metadata = json.load(f)
        except FileNotFoundError:
            metadata = {}

        existing_df = pd.DataFrame()
        try:
            existing_df = pd.read_csv(csv_file)
            if 'publication_year' not in existing_df.columns:
                print("Adding missing publication_year column to existing data")
                existing_df['publication_year'] = pd.to_datetime(existing_df['prism_coverdate']).dt.year
            print(f"Loaded {len(existing_df)} existing records from {csv_file}")
        except FileNotFoundError:
            print(f"No existing file found at {csv_file}. Starting fresh.")
        except Exception as e:
            print(f"Error reading CSV file: {e}")

        all_new_results = []
        latest_df = existing_df.copy()  # Initialize latest_df with existing data

        for year in years_to_process:
            query = f"AFFIL(\"The Hong Kong Polytechnic University\") AND PUBYEAR = {year}"
            print(f"\nExecuting Scopus search with query: {query}")

            year_results = []
            start = metadata.get(str(year), {}).get('last_start', 0)
            while True:
                try:
                    polyu_results = scopus_search(
                        scopus_credentials, query, start=start, max_results=max_results_per_api_call)

                    if not polyu_results:
                        print(f"No more results found for {year}.")
                        break

                    year_results.extend(polyu_results)
                    start += len(polyu_results)

                    print(f"Retrieved {len(polyu_results)} results for {year}. Total for {year}: {len(year_results)}")

                    if len(polyu_results) < max_results_per_api_call:
                        print(f"Reached the end of available results for {year}.")
                        break

                    # Update metadata
                    metadata[str(year)] = {'last_start': start}
                    with open(metadata_file, 'w') as f:
                        json.dump(metadata, f)

                except Exception as e:
                    print(f"Error during API call: {e}")
                    print("Saving current results and moving to next year.")
                    break

            # Process and save results for this year
            if year_results:
                try:
                    new_df = process_scopus_search_results(year_results)
                    if 'publication_year' not in new_df.columns:
                        # Ensure publication_year is added
                        new_df['publication_year'] = year

                    # Combine with existing data
                    latest_df = pd.concat([latest_df, new_df], ignore_index=True)
                    latest_df.drop_duplicates(subset='dc_identifier', keep='last', inplace=True)

                    # Try to save the results to a CSV file
                    try:
                        latest_df.to_csv(csv_file, index=False)
                        print(f"\nResults saved to '{csv_file}'. Total records: {len(latest_df)}")
                    except Exception as e:
                        print(f"Error saving CSV file: {e}")
                        print("Continuing with in-memory DataFrame.")

                except Exception as e:
                    print(f"Error processing results for {year}: {e}")

            all_new_results.extend(year_results)

        if not all_new_results:
            print("No new results to process across all years.")
        else:
            print(f"Total new results across all years: {len(all_new_results)}")

        return latest_df  # Return the latest DataFrame

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()  # Return an empty DataFrame in case of overall failure

In [13]:
## 3rd revised scopus_research_procedures function to handle results beyond 5000 limit:

def scopus_research_procedures(years_to_process):
    try:
        scopus_credentials, _ = get_credentials()  

        max_results_per_api_call = 5000

        csv_file = 'polyu_research_output.csv'
        metadata_file = 'scopus_search_metadata.json'
        
        # Load or initialize metadata
        try:
            with open(metadata_file, 'r') as f:
                metadata = json.load(f)
        except FileNotFoundError:
            metadata = {}

        existing_df = pd.DataFrame()
        try:
            existing_df = pd.read_csv(csv_file)
            if 'publication_year' not in existing_df.columns:
                print("Adding missing publication_year column to existing data")
                existing_df['publication_year'] = pd.to_datetime(existing_df['prism_coverdate']).dt.year
            print(f"Loaded {len(existing_df)} existing records from {csv_file}")
        except FileNotFoundError:
            print(f"No existing file found at {csv_file}. Starting fresh.")
        except Exception as e:
            print(f"Error reading CSV file: {e}")

        latest_df = existing_df.copy()  # Initialize latest_df with existing data

        for year in years_to_process:
            query = f"AFFIL(\"The Hong Kong Polytechnic University\") AND PUBYEAR = {year}"
            print(f"\nExecuting Scopus search with query: {query}")

            start = metadata.get(str(year), {}).get('last_start', 0)
            total_results = metadata.get(str(year), {}).get('total_results', None)
            
            while True:
                try:
                    polyu_results = scopus_search(
                        scopus_credentials, query, start=start, max_results=max_results_per_api_call)

                    if not polyu_results:
                        print(f"No more results found for {year}.")
                        break

                    # Process new results
                    new_df = process_scopus_search_results(polyu_results)
                    if 'publication_year' not in new_df.columns:
                        new_df['publication_year'] = year

                    # Exclude existing results
                    new_df = new_df[~new_df['dc_identifier'].isin(latest_df['dc_identifier'])]

                    # Append new results to latest_df
                    latest_df = pd.concat([latest_df, new_df], ignore_index=True)

                    start += len(polyu_results)
                    print(f"Retrieved {len(polyu_results)} new results for {year}. Total for {year}: {len(latest_df[latest_df['publication_year'] == year])}")

                    # Update total_results if not set
                    if total_results is None:
                        total_results = int(polyu_results[0].get('search-results', {}).get('opensearch:totalResults', 0))
                        print(f"Total results available for {year}: {total_results}")

                    # Update metadata
                    metadata[str(year)] = {'last_start': start, 'total_results': total_results}
                    with open(metadata_file, 'w') as f:
                        json.dump(metadata, f)

                    # Save results to CSV file
                    latest_df.to_csv(csv_file, index=False)
                    print(f"\nResults saved to '{csv_file}'. Total records: {len(latest_df)}")

                    if start >= total_results:
                        print(f"Reached the end of available results for {year}.")
                        break

                except Exception as e:
                    print(f"Error during API call: {e}")
                    print("Saving current results and moving to next year.")
                    break

        print(f"Total records across all years: {len(latest_df)}")
        return latest_df  # Return the latest DataFrame

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()  # Return an empty DataFrame in case of overall failure

In [14]:
def check_duplicates(df1, df2):
    # Assuming 'dc_identifier' is the unique identifier for each record
    duplicates = df1[df1['dc_identifier'].isin(df2['dc_identifier'])]
    print(f"Number of duplicate records: {len(duplicates)}")
    return duplicates

In [26]:
scopus_credentials, db_credentials = get_credentials()  
scopus_credentials, db_credentials

get_credentials() method


({'access_token': '79f2bcb9937cc0efa767b7e1b6da3055',
  'scopus_label': 'bennisSCOPUSKey'},
 {'hostname': 'portfolio-projects.ctoiwe860cq9.ap-southeast-1.rds.amazonaws.com',
  'port': 5432,
  'username': 'bennisyiu',
  'password': 'Yatpatyeh234',
  'database': 'postgres',
  'schema': 'public'})

In [None]:
# load_dotenv()

# # Access the credentials
# db_credentials = {
#     'hostname': os.getenv('DB_HOST'),
#     'port': int(os.getenv('DB_PORT', 5432)),  # Default to 5432 if not specified
#     'username': os.getenv('DB_USER'),
#     'password': os.getenv('DB_PASSWORD'),
#     'database': os.getenv('DB_NAME')
# }

# # Debugging: Print the credentials (ensure they are loaded correctly)
# print("Database Credentials:", db_credentials)

Database Credentials: {'hostname': 'portfolio-projects.ctoiwe860cq9.ap-southeast-1.rds.amazonaws.com', 'port': 5432, 'username': 'bennisyiu', 'password': 'Yatpatyeh234', 'database': 'postgres'}


In [27]:
db_credentials

{'hostname': 'portfolio-projects.ctoiwe860cq9.ap-southeast-1.rds.amazonaws.com',
 'port': 5432,
 'username': 'bennisyiu',
 'password': 'Yatpatyeh234',
 'database': 'postgres',
 'schema': 'public'}

In [28]:
def scopus_search_data_uploader(db_credentials, df, table_name='scopus_search_output'):
    """
    Upload Scopus data to Postgres database.
    Creates the table if it doesn't exist, then upserts data based on dc_identifier.
    """
    conn = None
    cursor = None
    print('scopus_search_data_uploader() method')
    
    try:
        # Data preprocessing
        date_columns = ['prism_coverdate']
        for col in date_columns:
            if col in df.columns:
                df[col] = pd.to_datetime(df[col], errors='coerce')

        df['publication_year'] = df['publication_year'].fillna(9999).astype(int)

        # Connect to the Postgres database
        conn = psycopg2.connect(
            host=db_credentials['hostname'],
            database=db_credentials['database'],
            user=db_credentials['username'],
            password=db_credentials['password'],
            port=db_credentials['port'],
            connect_timeout=30
        )
        cursor = conn.cursor()

        # Set the schema
        cursor.execute(sql.SQL("SET search_path TO {};").format(
            sql.Identifier(db_credentials['schema'])
        ))

        # Check if table exists, if not create it
        cursor.execute(sql.SQL("""
            CREATE TABLE IF NOT EXISTS {} (
                fa BOOLEAN,
                prism_url TEXT,
                dc_identifier TEXT PRIMARY KEY,
                prism_publicationname TEXT,
                prism_coverdate DATE,
                prism_doi TEXT,
                citedby_count INTEGER,
                subtype TEXT,
                subtypedescription TEXT,
                publication_year INTEGER
            )
        """).format(sql.Identifier(table_name)))

        # Prepare the data for insertion
        columns = df.columns.tolist()
        
        # Construct the INSERT ... ON CONFLICT DO UPDATE query
        insert_query = sql.SQL("""
            INSERT INTO {} ({})
            VALUES %s
            ON CONFLICT (dc_identifier) DO UPDATE SET
            {}
        """).format(
            sql.Identifier(table_name),
            sql.SQL(', ').join(map(sql.Identifier, columns)),
            sql.SQL(', ').join(
                sql.SQL("{0} = EXCLUDED.{0}").format(sql.Identifier(col))
                for col in columns if col != 'dc_identifier'
            )
        )

        # Insert or update data in chunks
        chunk_size = 50
        for i in range(0, len(df), chunk_size):
            chunk = df.iloc[i:i+chunk_size]
            values = [tuple(row) for _, row in chunk.iterrows()]
            execute_values(cursor, insert_query, values)
            print(f"Processed chunk of {len(chunk)} records")

        conn.commit()
        print(f"Successfully uploaded/updated data for {len(df)} records")

    except psycopg2.Error as e:
        print(f"Database error: {e}")
        if conn:
            conn.rollback()
        raise e
    except Exception as e:
        print(f"Unexpected error: {e}")
        if conn:
            conn.rollback()
        raise e
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

In [29]:
latest_df = pd.read_csv('polyu_research_output.csv')

In [30]:
latest_df.head()

Unnamed: 0,fa,prism_url,dc_identifier,prism_publicationname,prism_coverdate,prism_doi,citedby_count,subtype,subtypedescription,publication_year
0,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85150042634,ACS Nano,2023-03-28 00:00:00,10.1021/acsnano.2c12606,664,re,Review,2023
1,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85144234312,Psychology and Marketing,2023-04-01 00:00:00,10.1002/mar.21767,486,ar,Article,2023
2,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85146445335,Tourism Management,2023-08-01 00:00:00,10.1016/j.tourman.2023.104724,468,re,Review,2023
3,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85151316371,Nature Communications,2023-12-01 00:00:00,10.1038/s41467-023-37526-5,404,ar,Article,2023
4,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85147606524,Materials and Design,2023-02-01 00:00:00,10.1016/j.matdes.2023.111661,349,re,Review,2023


In [31]:
latest_df.tail()

Unnamed: 0,fa,prism_url,dc_identifier,prism_publicationname,prism_coverdate,prism_doi,citedby_count,subtype,subtypedescription,publication_year
27652,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85160096295,Acta Geotechnica,2024-01-01 00:00:00,10.1007/s11440-023-01928-y,2,ar,Article,2024
27653,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85159675707,IEEE Wireless Communications,2024-06-01 00:00:00,10.1109/MWC.019.2200606,2,ar,Article,2024
27654,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85159130476,"Environment, Development and Sustainability",2024-07-01 00:00:00,10.1007/s10668-023-03346-2,2,ar,Article,2024
27655,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85159066873,International Social Work,2024-03-01 00:00:00,10.1177/00208728231165638,2,ar,Article,2024
27656,True,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85159041714,Fire Technology,2024-03-01 00:00:00,10.1007/s10694-023-01416-5,2,ar,Article,2024


In [2]:
# test DB connection
# import psycopg2
# from dotenv import load_dotenv
# import os

# # Load environment variables
# load_dotenv()

# # Retrieve credentials
# db_credentials = {
#     'hostname': os.getenv('DB_HOST'),
#     'port': int(os.getenv('DB_PORT', 5432)),
#     'username': os.getenv('DB_USER'),
#     'password': os.getenv('DB_PASSWORD'),
#     'database': os.getenv('DB_NAME')
# }

# try:
#     # Print credentials for debugging
#     print("Hostname:", db_credentials['hostname'])
#     print("Port:", db_credentials['port'])
#     print("Username:", db_credentials['username'])
#     print("Password:", "*****")
#     print("Database:", db_credentials['database'])

#     # Attempt to connect to the database
#     conn = psycopg2.connect(
#         host=db_credentials['hostname'],
#         port=db_credentials['port'],
#         user=db_credentials['username'],
#         password=db_credentials['password'],
#         database=db_credentials['database']
#     )
#     print("Connection successful!")
#     conn.close()
# except Exception as e:
#     print(f"Error connecting to the database: {e}")

In [32]:
scopus_search_data_uploader(db_credentials, latest_df, table_name='scopus_search_output')

scopus_search_data_uploader() method
Database error: connection to server at "portfolio-projects.ctoiwe860cq9.ap-southeast-1.rds.amazonaws.com" (13.214.114.30), port 5432 failed: timeout expired



OperationalError: connection to server at "portfolio-projects.ctoiwe860cq9.ap-southeast-1.rds.amazonaws.com" (13.214.114.30), port 5432 failed: timeout expired


### Abstract Retrieval API Here