### Documentation here https://dev.elsevier.com/documentation/ScienceDirectSearchAPI.wadl

In [None]:
# https://api.elsevier.com/content/search/sciencedirect

In [2]:
import os
import csv
import requests
from credentials import keys
import pandas as pd

In [3]:
# Request URL
url = 'https://api.elsevier.com/content/search/sciencedirect'
headers = {
    'Accept': 'application/json',
    "X-ELS-APIKey": keys["els-apikey"],  #save the keys in a new python file called credentials.py that has a dict variable called  keys  = {"key": "value"}
    "X-ELS-Insttoken": keys["els-inst-token"],
}

In [5]:
# Ensure the 'csv' directory exists
if not os.path.exists('csv'):
    os.makedirs('csv')

# CSV file path
csv_file_path = os.path.join('csv', 'sciencedirect_search_results.csv')

# Define the base search query (adjust as needed)
base_query = '("recommendation system" OR "jitai" OR "recommendor") AND ("indoor environment" OR "thermal comfort")'


# Open the CSV file for writing
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['DOI', 'Year', 'Title'])  # Header row

    # Iterate over each year from 2010 to 2024
    for year in range(2000, 2025):
        start = 0  # Start at the beginning for each year
        while True:  # Loop to handle pagination for the current year
            date_range = f'{year}'  # Set date range to the current year
            params = {
                'query': base_query,
                'date': date_range,
                'count': 100,  # Fetch 100 results at a time
                'start': start
            }

            # Send the GET request
            response = requests.get(url, headers=headers, params=params)

            if response.status_code == 200:
                results = response.json()
                articles = results['search-results']['entry']
                total_fetched = len(articles)

                # Write DOI, Year, and Title for each article
                for article in articles:
                    # Skip articles without a 'dc:identifier'
                    if 'dc:identifier' not in article:
                        continue
                    
                    doi = article['dc:identifier'].replace('DOI:', '')
                    # year = article.get('prism:coverDate', '')[0:4]  # Extract publication year from coverDate
                    # title = article.get('dc:title', 'Title not available')
                    # writer.writerow([doi, year, title])
                    writer.writerow([doi])

                start += total_fetched  # Update `start` for the next page

                # Exit loop if there are no more results to fetch for the current year
                if total_fetched < 100:
                    break
            else:
                #print(f'Error fetching data for {year}: {response.status_code}')
                break  # Exit loop on error

print(f'Data has been successfully saved to {csv_file_path}')

KeyboardInterrupt: 