In [None]:
pip install pandas google.generativeai

In [None]:
import pandas as pd
import xml.etree.ElementTree as ET
import google.generativeai as genai
from pathlib import Path

In [None]:
def analyze_abstract_for_algorithm(abstract):
    genai.configure(api_key='your_api_key')
    
    prompt = f"""
    Analyze this abstract and determine if it reports a delirium prediction or detection algorithm.
    Answer only 'yes' if it describes developing or validating a prediction model, machine learning algorithm, 
    or detection system for delirium. Answer 'no' otherwise.
    
    Abstract: {abstract}
    """
    
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(prompt)
        return response.text.strip().lower() == 'yes'
    except Exception as e:
        print(f"Error analyzing abstract: {e}")
        return None

In [3]:
def parse_endnote_xml_to_df(xml_path):
    """
    Parse EndNote XML export file and extract publication details into a pandas DataFrame.
    """
    # Create empty lists to store the data
    data = {
        'Authors': [],
        'Year': [],
        'Title': [],
        'DOI': [],
        'Journal': [],
        'Abstract': []
    }
    
    # Parse the XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Find all record elements
    for record in root.findall('.//record'):
        try:
            # Extract authors
            authors = []
            authors_elem = record.find('.//authors')
            if authors_elem is not None:
                for author in authors_elem.findall('.//author'):
                    if author.find('.//style') is not None:
                        authors.append(author.find('.//style').text)
            
            # Extract year
            year_elem = record.find('.//dates/year/style')
            year = year_elem.text if year_elem is not None else ''
            
            # Extract title
            title_elem = record.find('.//titles/title/style')
            title = title_elem.text if title_elem is not None else ''
            
            # Extract DOI
            doi_elem = record.find('.//electronic-resource-num/style')
            doi = doi_elem.text if doi_elem is not None else ''
            if doi.startswith('https://dx.doi.org/'):
                doi = doi[len('https://dx.doi.org/'):]
            
            # Extract journal name
            journal_elem = record.find('.//secondary-title/style')
            if journal_elem is None:
                journal_elem = record.find('.//periodical/full-title/style')
            journal = journal_elem.text if journal_elem is not None else ''
            
            # Extract abstract
            abstract_elem = record.find('.//abstract/style')
            abstract = abstract_elem.text if abstract_elem is not None else ''
            
            # Append to data dictionary
            data['Authors'].append('; '.join(authors))
            data['Year'].append(year)
            data['Title'].append(title)
            data['DOI'].append(doi)
            data['Journal'].append(journal)
            data['Abstract'].append(abstract)
            
        except Exception as e:
            print(f"Error processing a record: {e}")
            continue
    
    # Create DataFrame
    df = pd.DataFrame(data)
    
    # Add algorithm analysis column
    print("Analyzing abstracts for algorithms...")
    df['algorithm'] = df['Abstract'].apply(analyze_abstract_for_algorithm)
    
    return df

In [None]:
# Usage
try:
    xml_path = r"your_endnote_export.xml"
    df = parse_endnote_xml_to_df(xml_path)
    
    # Basic data inspection
    print(f"\nNumber of articles found: {len(df)}")
    print("\nFirst few rows:")
    print(df.head())
    
    # Optional: Save to CSV
    # df.to_csv('publication_data_with_algorithm.csv', index=False)
    
except Exception as e:
    print(f"Error: {e}")

In [8]:
# Filter the dataframe where algorithm is True
algorithm_df = df[df['algorithm'] == True].copy()

In [9]:
# Add DOI link column
algorithm_df['DOI_Link'] = algorithm_df['DOI'].apply(lambda x: f'https://doi.org/{x}' if x else '')

In [10]:
# Select and reorder columns, excluding abstract
final_df = algorithm_df[['Authors', 'Year', 'Title', 'Journal', 'DOI', 'DOI_Link']]

In [None]:
# Display the filtered dataframe
print(f"Number of papers with algorithms: {len(final_df)}")
print("\nFirst few algorithm papers:")
print(final_df.head())

In [None]:
# Export to CSV
csv_path = 'delirium_algorithm_papers.csv'
final_df.to_csv(csv_path, index=False)
print(f"\nExported to {csv_path}")

In [None]:
# Optional: Display the count by year
year_count = final_df['Year'].value_counts().sort_index()
print("\nPapers by year:")
print(year_count)