In [1]:
import aiohttp
import pandas as pd
from typing import List, Dict, Any
import asyncio
from tqdm.auto import tqdm
import random
import json

QCE_ID = 1838864
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0.1 Safari/605.1.15"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
async with aiohttp.ClientSession() as session:
    query = {
        "parentId": QCE_ID,
    }
    headers = {
        "User-Agent": USER_AGENT,
        "Referer": f"https://ieeexplore.ieee.org/xpl/conhome/{QCE_ID}/all-proceedings",
    }
    async with session.get(
        "https://ieeexplore.ieee.org/rest/publication/conhome/metadata",
        headers=headers,
        params=query,
    ) as response:
        if response.status == 200:
            proceedings_data = await response.json()
            print(proceedings_data)
        else:
            print(f"Failed to fetch data: {response.status}")

{'records': [{'publicationNumber': '10821005', 'bmsProductNumber': 'CFP24W18-ART', 'publisher': 'IEEE', 'parentPublicationNumber': '1838864', 'displayTitle': '2024 IEEE International Conference on Quantum Computing and Engineering (QCE)', 'parentTitle': 'Quantum Computing and Engineering (QCE), IEEE International Conference on', 'doiLink': 'https://doi.org/10.1109/QCE60285.2024', 'hasParentConference': True, 'contentType': 'Conferences', 'conferenceDate': '15-20 Sept. 2024', 'publicationDoi': '10.1109/QCE60285.2024', 'issues': [{'issueNumber': 10820986, 'volume': '03', 'year': '2024', 'month': 'Sept.'}, {'issueNumber': 10820989, 'volume': '02', 'year': '2024', 'month': 'Sept.'}, {'issueNumber': 10821250, 'volume': '01', 'year': '2024', 'month': 'Sept.'}], 'publicationInactive': False, 'apc': 0, 'showIssueCoverImage': False, 'coverImage': '/ielx8/10821005/10820986/10821017.jpg', 'location': 'Montreal, QC, Canada', 'country': 'Canada'}, {'publicationNumber': '10313590', 'bmsProductNumber

In [3]:
proceedings_data["records"]

[{'publicationNumber': '10821005',
  'bmsProductNumber': 'CFP24W18-ART',
  'publisher': 'IEEE',
  'parentPublicationNumber': '1838864',
  'displayTitle': '2024 IEEE International Conference on Quantum Computing and Engineering (QCE)',
  'parentTitle': 'Quantum Computing and Engineering (QCE), IEEE International Conference on',
  'doiLink': 'https://doi.org/10.1109/QCE60285.2024',
  'hasParentConference': True,
  'contentType': 'Conferences',
  'conferenceDate': '15-20 Sept. 2024',
  'publicationDoi': '10.1109/QCE60285.2024',
  'issues': [{'issueNumber': 10820986,
    'volume': '03',
    'year': '2024',
    'month': 'Sept.'},
   {'issueNumber': 10820989, 'volume': '02', 'year': '2024', 'month': 'Sept.'},
   {'issueNumber': 10821250,
    'volume': '01',
    'year': '2024',
    'month': 'Sept.'}],
  'publicationInactive': False,
  'apc': 0,
  'showIssueCoverImage': False,
  'coverImage': '/ielx8/10821005/10820986/10821017.jpg',
  'location': 'Montreal, QC, Canada',
  'country': 'Canada'},

In [4]:
issue_data = []

for proceeding in proceedings_data["records"]:
    publication_id = proceeding["publicationNumber"] 
    for issue in proceeding["issues"]:
        issue["publication_id"] = publication_id
    issue_data.extend(proceeding["issues"])

In [5]:
issue_data

[{'issueNumber': 10820986,
  'volume': '03',
  'year': '2024',
  'month': 'Sept.',
  'publication_id': '10821005'},
 {'issueNumber': 10820989,
  'volume': '02',
  'year': '2024',
  'month': 'Sept.',
  'publication_id': '10821005'},
 {'issueNumber': 10821250,
  'volume': '01',
  'year': '2024',
  'month': 'Sept.',
  'publication_id': '10821005'},
 {'issueNumber': 10313605,
  'volume': '03',
  'year': '2023',
  'month': 'Sept.',
  'publication_id': '10313590'},
 {'issueNumber': 10313594,
  'volume': '02',
  'year': '2023',
  'month': 'Sept.',
  'publication_id': '10313590'},
 {'issueNumber': 10313591,
  'volume': '01',
  'year': '2023',
  'month': 'Sept.',
  'publication_id': '10313590'},
 {'issueNumber': 9951167,
  'year': '2022',
  'month': 'Sept.',
  'publication_id': '9951048'},
 {'issueNumber': 9605263,
  'year': '2021',
  'month': 'Oct.',
  'publication_id': '9605247'},
 {'issueNumber': 9259905,
  'year': '2020',
  'month': 'Oct.',
  'publication_id': '9259908'}]

In [6]:
async def get_issue_data_page(
    session: aiohttp.ClientSession, issue_data: List[Dict[str, Any]], page: int | None
):
    publication_id = issue_data["publication_id"]
    issue_id = issue_data["issueNumber"]
    body = {
        "sortType": "vol-only-seq",
        "punumber": publication_id,
        "isnumber": issue_id,
    }
    if page is not None:
        assert page > 0, "Page number must be greater than 0"
        body["pageNumber"] = page

    url = f"https://ieeexplore.ieee.org/rest/search/pub/{publication_id}/issue/{issue_id}/toc"
    headers = {
        "User-Agent": USER_AGENT,
        "Referer": f"https://ieeexplore.ieee.org/xpl/conhome/{publication_id}/proceeding",
    }
    async with session.post(
        url,
        headers=headers,
        json=body,
    ) as response:
        if response.status == 200:
            issue_data = await response.json()
            return issue_data
        else:
            raise Exception(f"Failed to fetch data: {response.status}")

def parse_issue_response(issue_response: Dict[str, Any]):
    paper_data = []

    for record in issue_response["records"]:
        access_type = record["accessType"]["type"]
        if access_type == "locked":
            title = record["articleTitle"]
            document_link = record["documentLink"]
            abstract = record["abstract"]
            year = record["publicationYear"]
            date = record["publicationDate"]
            publication_id = record["publicationNumber"]
            issue_id = record["isNumber"]
            paper_record = {
                "title": title,
                "document_link": document_link,
                "abstract": abstract,
                "year": year,
                "date": date,
                "publication_id": publication_id,
                "issue_id": issue_id,
            }
            paper_data.append(paper_record)
    return paper_data

async def get_issue_data(session: aiohttp.ClientSession, issue_data: List[Dict[str, Any]]):
    issue_response = await get_issue_data_page(session, issue_data, None)
    total_pages = issue_response.get("totalPages", None)
    paper_data = parse_issue_response(issue_response)
    if total_pages is None:
        return paper_data
    else:
        for page in range(2, total_pages + 1):
            issue_response = await get_issue_data_page(session, issue_data, page)
            paper_data.extend(parse_issue_response(issue_response))
        return paper_data

In [7]:
async with aiohttp.ClientSession() as session:
    paper_data = await get_issue_data(session, issue_data[0])

In [8]:
with open("qce_paper_data.json", "w") as f:
    json.dump(paper_data, f, indent=4, ensure_ascii=False)