In [84]:
from pyzotero import zotero
import bibtexparser
import os
import csv
import pandas as pd

## Helper Functions

In [23]:
def get_current_directory():
    """
    Get the current working directory.

    Returns:
    str: The current working directory.
    """
    current_directory = os.getcwd()
    return current_directory


def create_directory_if_not_exists(directory_path):
    """
    Create a directory if it does not exist.

    Parameters:
    directory_path (str): The path of the directory to be created.
    """
    try:
        os.makedirs(directory_path, mode=0o777, exist_ok=True)
    except Exception as e:
        print(f"An error occurred while creating the directory: {e}")

## Connect to API


In [126]:
#API Keys
library_id = '7850473'
library_type = 'user'
api_key = 'S1gVjSPo0PZc5CYSps6LC6BY'
bronze_collection_keys = {'scopus_bronze_querya':'PUSUFD8D','gs_bronze_gquerya':'JWAMZELQ'}

zot = zotero.Zotero(library_id, library_type, api_key)

## Set Zotero Collections

In [128]:
zot = zotero.Zotero(library_id, library_type, api_key)

# collections = zot.collections()

# for collection in collections:
#     print(f"Collection Name: {collection['data']['name']}")
#     print(f"Collection ID: {collection['data']['key']}")
#     print("----")
    
bronze_collection_keys = {'gs_bronze_gquerya':'PUSUFD8D','scopus_bronze_querya':'JWAMZELQ'}

## Write Collection Bibtex

In [129]:
curr_dir = get_current_directory()
create_directory_if_not_exists(f"{curr_dir}/bibtex")
for name_key, collection_key in bronze_collection_keys.items():
    print(name_key, collection_key)
    zot.add_parameters(format='bibtex')
    
    file_name = f"{curr_dir}/bibtex/{name_key}-ref.bib"
    
    bib_db = zot.everything(zot.collection_items(collection_key))
    #print(bib_db)
    with open(file_name, 'w') as bibtex_file:
        bibtexparser.dump(bib_db, bibtex_file)
    bibtex_file.close()

gs_bronze_gquerya PUSUFD8D
scopus_bronze_querya JWAMZELQ


# Bibtex to CSV

In [130]:
bib_file_paths = [f"{curr_dir}/bibtex/{name_key}-ref.bib" for name_key in bronze_collection_keys.keys()]
bib_file_paths

['/Users/daltonsi/HILS/hils-lit-review/bibtex/gs_bronze_gquerya-ref.bib',
 '/Users/daltonsi/HILS/hils-lit-review/bibtex/scopus_bronze_querya-ref.bib']

In [131]:
def parse_bibtex_to_csv(bibtex_file_paths, csv_file_path):
    """
    Parse a BibTeX file and write its contents to a CSV file.

    Parameters:
    bibtex_file_path (str): The path to the BibTeX file.
    csv_file_path (str): The path to the CSV file to be created.
    """
    bib_ids = [bib_path.split('/')[-1].strip('-ref.bib') for bib_path in bibtex_file_paths]
    fieldnames = set()
    global_entries = []
    for bib_path in bibtex_file_paths:
        curr_bib_id = bib_path.split('/')[-1].strip('-ref.bib')
        print(curr_bib_id)
        
        with open(bib_path, 'r') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
# #         # Extract the entries from the BibTeX file
            entries = bib_database.entries
#         # Determine the fieldnames for the CSV (all unique keys in the entries)
            #fieldnames = set()
            for entry in entries:
                for bib_id in bib_ids:
                    if curr_bib_id == bib_id:
                        entry[f"bib-{bib_id}"] = 1
                    else:
                        entry[f"bib-{bib_id}"] = 0
                fieldnames.update(entry.keys())
                fieldnames.update(['bib_id'])
            global_entries += entries
    fieldnames = sorted(fieldnames)  # Sorting the fieldnames for consistent column order
    # Write the entries to a CSV file
    with open(csv_file_path, 'w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for entry in global_entries:
            writer.writerow(entry)

In [137]:
create_directory_if_not_exists(csv_path)
parse_bibtex_to_csv(bib_file_paths,csv_path + 'query_results.csv')

gs_bronze_gquerya
scopus_bronze_querya


In [138]:
query_result_df = pd.read_csv(csv_path + 'query_results.csv')
total_result_count = len(query_result_df)
print(f"{total_result_count} Results found among {len(bib_file_paths)} Queries")
query_result_df

469 Results found among 2 Queries


Unnamed: 0,ENTRYTYPE,ID,author,bib-gs_bronze_gquerya,bib-scopus_bronze_querya,bib_id,booktitle,doi,isbn,issn,journal,note,number,pages,publisher,shorttitle,title,url,volume,year
0,inproceedings,abdollahi_ontology-guided_2020,"Abdollahi, Mahdi and Gao, Xiaoying and Mei, Yi...",1,0,,,,,,,,,78--88,Springer,,Ontology-guided data augmentation for medical ...,,,2020.0
1,article,abdollahi_substituting_2021,"Abdollahi, Mahdi and Gao, Xiaoying and Mei, Yi...",1,0,,,,,0933-3657,Artificial Intelligence in Medicine,Publisher: Elsevier,,102167,,,Substituting clinical features using synthetic...,,120,2021.0
2,article,adib_bnclinical-sum_2024,"Adib, Quazi Adibur Rahman and Alam, Sanjana Binte",1,0,,,,,,,Publisher: Brac University,,,,,{BnClinical}-{Sum}: benchmarking datasets for ...,,,2024.0
3,article,ahmed_text_2023,"Ahmed, Hadeer and Traore, Issa and Mamun, Moha...",1,0,,,,,2666-8270,Machine Learning with Applications,Publisher: Elsevier,,100452,,,Text augmentation using a graph-based approach...,,11,2023.0
4,article,ahsan_advancements_2024,"Ahsan, Mohammad and Khan, Anam and Khan, Kaif ...",1,0,,,,,0266-4720,Expert Systems,Publisher: Wiley Online Library,3,e13499,,,Advancements in medical diagnosis and treatmen...,,41,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,inproceedings,zhu_deepem_2018,"Zhu, W. and Vang, Y.S. and Huang, Y. and Xie, X.",0,1,,,10.1007/978-3-030-00934-2_90,,,,,,812--820,,,{DeepEM}: {Deep} {3D} {ConvNets} with {EM} for...,https://www.scopus.com/inward/record.uri?eid=2...,11071 LNCS,2018.0
465,inproceedings,zhu_graph_2023,"Zhu, D. and Liu, Y. and Chen, W. and Wang, Y. ...",0,1,,,10.1007/978-3-031-46671-7_3,,,,,,33--47,,,Graph {Convolution} {Synthetic} {Transformer} ...,https://www.scopus.com/inward/record.uri?eid=2...,14178 LNAI,2023.0
466,article,zhu_intelligent_2022,"Zhu, Z. and Li, J. and Huang, J. and Li, Z. an...",0,1,,,10.21037/tp-22-275,,,Translational Pediatrics,,7,1216--1233,,,An intelligent prediagnosis system for disease...,https://www.scopus.com/inward/record.uri?eid=2...,11,2022.0
467,inproceedings,zotova_multilingual_2023,"Zotova, E.",0,1,,,,,,,,,111--119,,,Multilingual {Information} {Extraction} in {Cl...,https://www.scopus.com/inward/record.uri?eid=2...,3625,2023.0


In [146]:
grouped = query_result_df.groupby("ID")
combined_df = grouped.agg(lambda col: ' '.join(map(str, col.unique())))
combined_df['bib-gs_bronze_gquerya'] = combined_df['bib-gs_bronze_gquerya'].str.replace('1 0', '1')
combined_df['bib-scopus_bronze_querya'] = combined_df['bib-scopus_bronze_querya'].str.replace('0 1', '1')

combined_df.to_csv(csv_path + 'query_results_merged.csv')

In [144]:
print(f"{total_result_count} Results found among {len(bib_file_paths)} Queries")
print(f"This includes {len(combined_df)} unique articles")

469 Results found among 2 Queries
This includes 460 unique articles
