In [2]:
#!/usr/bin/python
# -*| coding: UTF|8 |*|

# from string import maketrans   # 必须调用 maketrans 函数。
import requests
import feedparser
import re
import os
from datetime import datetime
from urllib.parse import urljoin

In [19]:
def arXivFind(params:list, id_list=None,start=0, max_results=10,rules="AND"):
    """
    Find papers with given parameters.

    :param params: A list of strings containing search requirements (Key:context, e.g. ["ti:graph","all:minor"]).
    
        Search keys:
        - ti: Title
        - au: Author
        - abs: Abstract
        - co: Comment
        - jr: Journal Reference
        - cat: Subject Category
        - rn: Report Number
        - id: Id (use id_list instead)
        - all: A combined search of all the above
    :param id_list (Optional): A list of specific paper IDs to filter by (e.g. []), default is None (No filters).
    :param start (Optional): The starting index of the query, with 0 being the first, default is 0. 
    :param max_result (Optional): The number of collections returned by the query, default is 10.
    :param rules (Optional): The logical rules that are satisfied between all conditions, allowing "AND"(default) and "OR". Or a list with "AND", "OR", "ANDNOT"/"NOT".
    """

    assert (params is not None) or (id_list is not None), "Invalid Find parameters(Empty)"

    qstr = ""
    if params is not None:
        qstr = "search_query="+ params[0]
        if isinstance(rules,str):
            rules = [rules]
        l = len(rules)
        for id,param in enumerate(params[1:]):
            qstr += f"+{rules[id%l]}+{param}"

    istr = ""
    if id_list is not None:
        istr = ",".join(id_list)
        istr = "id_list=" + istr
    
    qistr = qstr + "&" if qstr is not None and istr is not None else "" + istr

    return arxivQuery(qistr+f"&start={start}&max_results={max_results}")

    pass

# http://export.arxiv.org/api/query?search_query=cat:cs.CC+AND+submittedDate:[19990101+TO+20251231]&sortBy=submittedDate&sortOrder=descending

def arxivQuery(query, start=0, max_results=10):
    base_url = "http://export.arxiv.org/api/query?"
    url = base_url + query
    print(url)
    try:
        # 发送 HTTP 请求获取文章列表
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch data from arXiv API. Status code: {response.status_code}")
            return

        # 解析 RSS feed
        feed = feedparser.parse(response.content)
        return feed
    except requests.exceptions.RequestException as e:
        print(f"Internet Request Error: {e}")
    except Exception as e:
        print(f"Unexpected Error: {e}")
    pass

In [23]:
def sanitize_filename(title):
    """
    将标题转换为合法的文件名。
    """
    # 去除非法字符
    safe_title = re.sub(r'[^\w_ ]', '_', title)
    # 替换空格为下划线
    return safe_title.replace(' ', '_').replace('__','_').replace('__','_').replace('__','_').replace('__','_')

def checkEntries(feed):
    for entry in feed.entries:
        try:
            title = sanitize_filename(entry.title)
        except Exception as e:
            print(f"Entry format Error:{e}")
        try:
            print(entry)
        except Exception as e:
            print(f"Error processing entry '{title}': {e}")

In [24]:
x = arXivFind(params=["ti:graph","all:minor"])
checkEntries(x)

http://export.arxiv.org/api/query?search_query=ti:graph+AND+all:minor&&start=0&max_results=10
{'id': 'http://arxiv.org/abs/1604.04072v1', 'guidislink': True, 'link': 'http://arxiv.org/abs/1604.04072v1', 'updated': '2016-04-14T08:34:22Z', 'updated_parsed': time.struct_time(tm_year=2016, tm_mon=4, tm_mday=14, tm_hour=8, tm_min=34, tm_sec=22, tm_wday=3, tm_yday=105, tm_isdst=0), 'published': '2016-04-14T08:34:22Z', 'published_parsed': time.struct_time(tm_year=2016, tm_mon=4, tm_mday=14, tm_hour=8, tm_min=34, tm_sec=22, tm_wday=3, tm_yday=105, tm_isdst=0), 'title': 'Graph Nimors', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Graph Nimors'}, 'summary': 'In the game of Graph Nimors, two players alternately perform graph minor\noperations (deletion and contraction of edges) on a graph until no edges\nremain, at which point the player who last moved wins. We present theoretical\nand experimental results and conjectures regarding this game.', 'summary_detail': 

In [31]:
a = x.entries[-1]
print(a.keys())

dict_keys(['id', 'guidislink', 'link', 'updated', 'updated_parsed', 'published', 'published_parsed', 'title', 'title_detail', 'summary', 'summary_detail', 'authors', 'author_detail', 'author', 'links', 'arxiv_primary_category', 'tags'])


In [35]:
print(a.links)

[{'href': 'http://arxiv.org/abs/1011.5119v1', 'rel': 'alternate', 'type': 'text/html'}, {'title': 'pdf', 'href': 'http://arxiv.org/pdf/1011.5119v1', 'rel': 'related', 'type': 'application/pdf'}]
