Import packages:

In [5]:
import csv
import pandas as pd

Defining function to update csv file:

In [6]:
def update_csv_file(csv_file_path, *database_lists):
    """
    Combine multiple lists of databases, remove duplicates based on URL, and write the results to a CSV file.

    Args:
        csv_file_path (str): Path to the CSV file to be created/updated.
        *database_lists (list of dicts): Variable number of lists containing database information.
    """
    # Combine all provided database lists into one, using the URL as a unique identifier to remove duplicates
    combined_databases = {db['URL']: db for database_list in database_lists for db in database_list}.values()
    
    # Write the combined list to the CSV file
    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=["Name", "URL", "Category", "Description"])
        writer.writeheader()
        for db in combined_databases:
            writer.writerow(db)

Add new databases below. Give new name to each new database.

In [7]:
db08272024 = [
    {"Name": "PubMed", "URL": "https://pubmed.ncbi.nlm.nih.gov", "Category": "Research Papers Database", "Description": "Free database of biomedical and life sciences literature."},
    {"Name": "Google Scholar", "URL": "https://scholar.google.com", "Category": "Research Papers Database", "Description": "Broad search of scholarly literature across many disciplines."},
    {"Name": "IEEE Xplore", "URL": "https://ieeexplore.ieee.org", "Category": "Research Papers Database", "Description": "Digital library for research papers in electronics, electrical engineering and computer science."},
    {"Name": "JSTOR", "URL": "https://www.jstor.org", "Category": "Research Papers Database", "Description": "Access to thousands of academic journals, books."},
    {"Name": "ScienceDirect", "URL": "https://www.sciencedirect.com", "Category": "Research Papers Database", "Description": "Large collection of scientific and technical research articles."},
    {"Name": "SpringerLink", "URL": "https://link.springer.com", "Category": "Research Papers Database", "Description": "Comprehensive range of scientific documents including journals and books."},
    {"Name": "Wiley Online Library", "URL": "https://onlinelibrary.wiley.com", "Category": "Research Papers Database", "Description": "Wide range of scientific journals and books across many disciplines."}
]

db08282024 = [
    {"Name": "PLOS ONE", "URL": "https://journals.plos.org/plosone", "Category": "Research Papers Database", "Description": "Open-access journal covering a broad range of scientific disciplines."},
    {"Name": "BioRxiv", "URL": "https://www.biorxiv.org", "Category": "Research Papers Database", "Description": "Preprint server for biological sciences."},
    {"Name": "arXiv", "URL": "https://arxiv.org", "Category": "Research Papers Database", "Description": "Repository for preprints in fields such as physics, mathematics, computer science and more."},
    {"Name": "ERIC", "URL": "https://eric.ed.gov", "Category": "Research Papers Database", "Description": "Access to education literature and resources."},
    {"Name": "PsycINFO", "URL": "https://www.apa.org/pubs/databases/psycinf", "Category": "Research Papers Database", "Description": "Covers literature in psychology and related fields."},
    {"Name": "Cochrane Library", "URL": "https://www.cochranelibrary.com", "Category": "Research Papers Database", "Description": "Systematic reviews and evidence-based resources for healthcare."},
    {"Name": "Sci-Hub", "URL": "https://sci-hub.st", "Category": "Research Papers Database", "Description": "Provides access to scientific papers for free (note the legal and ethical considerations)."},
    {"Name": "ResearchGate", "URL": "https://www.researchgate.net", "Category": "Research Papers Database", "Description": "Network where researchers share papers and collaborate."}
]

db08292024 = [
    {"Name": "Microsoft Academic", "URL": "https://academic.microsoft.com", "Category": "Research Papers Database", "Description": "Free search engine for scholarly literature."},
    {"Name": "Directory of Open Access Journals (DOAJ)", "URL": "https://www.doaj.org", "Category": "Research Papers Database", "Description": "Access to high-quality, open-access, peer-reviewed journals."},
    {"Name": "Academia.edu", "URL": "https://www.academia.edu", "Category": "Research Papers Database", "Description": "Platform for researchers to share their papers and collaborate."},
    {"Name": "PubChem", "URL": "https://pubchem.ncbi.nlm.nih.gov", "Category": "Chemical Database", "Description": "Free chemistry database maintained by NCBI."},
    {"Name": "Social Science Research Network (SSRN)", "URL": "https://www.ssrn.com", "Category": "Research Papers Database", "Description": "Repository for research in social sciences and humanities."},
    {"Name": "Open Access Theses and Dissertations (OATD)", "URL": "https://oatd.org", "Category": "Theses and Dissertations Database", "Description": "Indexes open access graduate theses and dissertations."},
    {"Name": "Chemical Abstracts Service (CAS)", "URL": "https://www.cas.org", "Category": "Chemical Database", "Description": "Comprehensive chemical information (subscription may be required)."},
    {"Name": "MedlinePlus", "URL": "https://medlineplus.gov", "Category": "Health Information Database", "Description": "Provides information about health topics, drugs, and medical research."},
    {"Name": "PsyArXiv", "URL": "https://psyarxiv.com", "Category": "Preprint Server", "Description": "Preprint server for the psychological sciences."},
    {"Name": "Harvard DASH", "URL": "https://dash.harvard.edu", "Category": "Institutional Repository", "Description": "Institutional repository for Harvard University's research outputs."},
    {"Name": "MIT DSpace", "URL": "https://dspace.mit.edu", "Category": "Institutional Repository", "Description": "MIT's repository of research and scholarship."},
    {"Name": "Zenodo", "URL": "https://zenodo.org", "Category": "Open Access Repository", "Description": "Open-access repository for a wide range of academic research outputs."},
    {"Name": "Figshare", "URL": "https://figshare.com", "Category": "Research Repository", "Description": "Allows researchers to upload and share research outputs across disciplines."},
    {"Name": "Dryad", "URL": "https://datadryad.org", "Category": "Data Repository", "Description": "Curated resource for datasets across various scientific disciplines."}
]

# Additional scientific paper publishers
db082920241345 = [
    {"Name": "Nature Publishing Group", "URL": "https://www.nature.com", "Category": "Scientific Publisher", "Description": "Publisher of journals in the fields of science and medicine."},
    {"Name": "Elsevier", "URL": "https://www.elsevier.com", "Category": "Scientific Publisher", "Description": "Publisher of journals in various scientific disciplines including health, life sciences, and social sciences."},
    {"Name": "Springer", "URL": "https://www.springer.com", "Category": "Scientific Publisher", "Description": "Provides academic journals and books in science, technology, and medicine."},
    {"Name": "Wiley", "URL": "https://www.wiley.com", "Category": "Scientific Publisher", "Description": "Publisher of academic journals and books across various scientific and technical disciplines."},
    {"Name": "Taylor & Francis", "URL": "https://www.taylorandfrancis.com", "Category": "Scientific Publisher", "Description": "Publisher of academic journals and books in the humanities, social sciences, and natural sciences."},
    {"Name": "Cambridge University Press", "URL": "https://www.cambridge.org", "Category": "Scientific Publisher", "Description": "Publisher of academic books and journals across various disciplines."},
    {"Name": "Royal Society of Chemistry", "URL": "https://www.rsc.org", "Category": "Scientific Publisher", "Description": "Publisher of journals, books, and magazines in chemistry and related disciplines."},
    {"Name": "American Chemical Society", "URL": "https://pubs.acs.org", "Category": "Scientific Publisher", "Description": "Publisher of journals in chemistry and related fields."},
    {"Name": "Institute of Physics Publishing", "URL": "https://iopscience.iop.org", "Category": "Scientific Publisher", "Description": "Publisher of journals and books in physics and related disciplines."},
    {"Name": "SAGE Publications", "URL": "https://journals.sagepub.com", "Category": "Scientific Publisher", "Description": "Publisher of journals and books across various academic disciplines including social sciences and humanities."}
]

In [8]:
# Filepath to save the CSV
csv_file_path = "research_papers_databases.csv"

# Call the function with any number of database lists
update_csv_file(csv_file_path, db08272024, db08282024, db08292024, db082920241345)

print(f"CSV file '{csv_file_path}' has been updated.")

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Get the dimensions of the DataFrame
num_rows, num_columns = df.shape

print(f"The CSV file has {num_rows} rows and {num_columns} columns.")

CSV file 'research_papers_databases.csv' has been updated.
The CSV file has 39 rows and 4 columns.
