# Xplore

## Imports

In [24]:
import os
import json
import datetime

import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

import scienceplots

from xploreapi import XPLORE

In [2]:
print(f"Last run: {datetime.datetime.now()}")
print(f"numpy: {np.__version__}", f"pandas: {pd.__version__}", f"matplotlib: {matplotlib.__version__}")

Last run: 2024-03-08 12:21:32.137236
numpy: 1.26.4 pandas: 2.2.1 matplotlib: 3.8.3


In [3]:
matplotlib.style.use(['science', 'ieee'])

In [4]:
%matplotlib inline

## IEEEXplore Data Analysis

In [5]:
def load_api_key(filepath):
    with open(filepath, 'r') as file:
        config = json.load(file)
        return config.get('XPLORE_API_KEY', None)

ieee_api_key = load_api_key('../api_keys.json')

if not ieee_api_key:
    print("XPlore API key not found in config.")

In [6]:
query = XPLORE(ieee_api_key)  # create an query object
query.maximumResults(200)  # define maximum number of results, 200 is the maximum
query.dataType('json')  # define data type
query.dataFormat('object')  # define data format
# query.issn('1949-3053')  # define ISSN
query.publicationTitle('IEEE Transactions on Smart Grid')
query.insertionStartDate('20230101')  # define start date
query.insertionEndDate('20230331')  # define end date
data = query.callAPI()  # call the API

articles_filter_no_author = [x for x in data['articles'] if len(x['authors']['authors']) > 0]
articles_filter_no_author_termns = [x for x in articles_filter_no_author if 'publication_date' in x.keys()]
articles = [x for x in articles_filter_no_author_termns if 'author_terms' in x['index_terms'].keys()]

# Write to file
with open('./../data/raw/articles.txt', 'w') as file:
    for item in articles:
        file.write("%s\n" % item)

In [28]:
def fetch_ieee_articles(api_key, publication_title, start_date, end_date, save=False, filename='articles.txt'):
    """
    Fetch articles from IEEE Xplore API for a specific publication within a given date range and optionally save to a file.
    
    Parameters:
    api_key (str): The API key for accessing the IEEE Xplore API.
    publication_title (str): The title of the publication to search for.
    start_date (str): The start date for the search query in YYYYMMDD format.
    end_date (str): The end date for the search query in YYYYMMDD format.
    save (bool): Whether to save the fetched articles to a file. Defaults to False.
    filename (str): The filename where the articles will be saved if save is True. Defaults to 'articles.txt'.
    
    Returns:
    list: A list of articles that match the search criteria.
    """
    query = XPLORE(api_key)  # create an query object
    query.maximumResults(200)  # define maximum number of results, 200 is the maximum
    query.dataType('json')  # define data type
    query.dataFormat('object')  # define data format
    # query.issn('1949-3053')  # optionally define ISSN
    query.publicationTitle(publication_title)
    query.insertionStartDate(start_date)  # define start date
    query.insertionEndDate(end_date)  # define end date
    data = query.callAPI()  # call the API

    # Filter articles with authors and required terms
    articles_filter_no_author = [x for x in data['articles'] if 'authors' in x and len(x['authors']['authors']) > 0]
    articles_filter_no_author_terms = [x for x in articles_filter_no_author if 'publication_date' in x]
    articles = [x for x in articles_filter_no_author_terms if 'author_terms' in x.get('index_terms', {}).keys()]

    if save:
        # Write to file
        with open(filename, 'w') as file:
            for item in articles:
                file.write("%s\n" % item)
                
    return articles

# Example usage:
articles = fetch_ieee_articles(api_key=ieee_api_key,
                                publication_title='IEEE Transactions on Smart Grid',
                                start_date='20230101', end_date='20230331', save=True,
                                filename='./../data/raw/TSG202303.txt')


In [None]:
df = pd.DataFrame()
df['doi'] = [x['doi'] for x in articles]
df['publication_date'] = [x['publication_date'] for x in articles]
df['ieee_terms'] = [x['index_terms']['ieee_terms']['terms'] for x in articles]
df['author_terms'] = [x['index_terms']['author_terms']['terms'] for x in articles]

In [9]:
df

Unnamed: 0,doi,publication_date,ieee_terms,author_terms
0,10.1109/TSG.2023.3240580,July 2023,"[Roads, Navigation, Feature extraction, Topolo...","[Electric vehicle fleet charging guidance, tra..."
1,10.1109/TSG.2023.3237624,Sept. 2023,"[Microgrids, Blockchains, Peer-to-peer computi...","[Microgrid, blockchain, prosumers, smart city,..."
2,10.1109/TSG.2023.3263107,Nov. 2023,"[Integrated circuits, Microgrids, Integrated c...","[Distributed control, hybrid ac/dc microgrid, ..."
3,10.1109/TSG.2023.3256480,Nov. 2023,"[Power system protection, Power system faults,...","[Cascading failure, deep learning, detection m..."
4,10.1109/TSG.2023.3246083,Sept. 2023,"[Pricing, Batteries, Peer-to-peer computing, M...","[P2P energy sharing, energy management, vehicl..."
...,...,...,...,...
115,10.1109/TSG.2023.3255168,Nov. 2023,"[Power system stability, Phase locked loops, C...","[Angle stability, grid-following inverter, gri..."
116,10.1109/TSG.2023.3261271,Nov. 2023,"[Trajectory, Home appliances, Load monitoring,...","[Capsule network, conditional generative adver..."
117,10.1109/TSG.2023.3258441,July 2023,"[Thermal loading, Buildings, Load modeling, He...","[Integrated energy systems, robust nodal prici..."
118,10.1109/TSG.2023.3243811,Sept. 2023,"[Convergence, Scheduling, Convex functions, Vi...","[Energy sharing, virtual power plant, ADMM, ma..."
