In [15]:
import time
import requests
import feedparser
import pandas as pd
from datetime import datetime, timedelta, timezone

In [None]:
def fetch_arxiv_papers(
    query="AI OR artificial intelligence",
    category="cs.AI",
    max_results=100,
    within_days=None
):
    base_url = "http://export.arxiv.org/api/query?"
    sort_by = "submittedDate"
    sort_order = "descending"
    results_per_call = 50  # arXiv recommends <=100 per call
    all_entries = []

    # Define date range if within_days is provided
    to_date = datetime.now(timezone.utc)
    from_date = to_date - timedelta(days=within_days) if within_days else None

    for start in range(0, max_results, results_per_call):
        url = (
            f"{base_url}search_query=({query})+AND+cat:{category}"
            f"&sortBy={sort_by}&sortOrder={sort_order}"
            f"&start={start}&max_results={results_per_call}"
        )

        response = requests.get(url)
        response.raise_for_status()
        feed = feedparser.parse(response.text)
        all_entries.extend(feed.entries)

        # Respect rate limits
        time.sleep(3)

    papers = []
    for entry in all_entries:
        published_dt = datetime.strptime(entry.published, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
        if from_date is None or published_dt >= from_date:
            papers.append({
                "title": entry.title.strip(),
                "authors": ", ".join(author.name for author in entry.authors),
                "published": entry.published,
                "summary": entry.summary.replace("\n", " ").strip(),
                "pdf_url": next((l.href for l in entry.links if l.type == "application/pdf"), None),
                "arxiv_url": entry.link
            })

    return pd.DataFrame(papers)


In [13]:
print("Fetching AI papers from the last week...")
df = fetch_arxiv_papers(within_days=7, max_results=50)
print(f"✅ Retrieved {len(df)} recent papers.")
print(df.head(5))

Fetching AI papers from the last week...
✅ Retrieved 10 recent papers.
                                               title  \
0         Reward Models are Metrics in a Trench Coat   
1  Improving GUI Grounding with Explicit Position...   
2  Test-Time Defense Against Adversarial Attacks ...   
3  Self-Anchor: Large Language Model Reasoning vi...   
4  Abstain and Validate: A Dual-LLM Policy for Re...   

                                             authors             published  \
0                                 Sebastian Gehrmann  2025-10-03T17:59:44Z   
1  Suyuchen Wang, Tianyu Zhang, Ahmed Masry, Chri...  2025-10-03T17:59:34Z   
2  Dong Lao, Yuxiang Zhang, Haniyeh Ehsani Oskoui...  2025-10-03T17:57:25Z   
3           Hongxiang Zhang, Yuan Tian, Tianyi Zhang  2025-10-03T17:56:33Z   
4  José Cambronero, Michele Tufano, Sherry Shi, R...  2025-10-03T17:53:28Z   

                                             summary  \
0  The emergence of reinforcement learning in pos...   
1  GUI grou

In [10]:
df.to_csv("~/Desktop/arxiv_ai_last_week.csv", index=False)

In [14]:
display(df)

Unnamed: 0,title,authors,published,summary,pdf_url,arxiv_url
0,Reward Models are Metrics in a Trench Coat,Sebastian Gehrmann,2025-10-03T17:59:44Z,The emergence of reinforcement learning in pos...,http://arxiv.org/pdf/2510.03231v1,http://arxiv.org/abs/2510.03231v1
1,Improving GUI Grounding with Explicit Position...,"Suyuchen Wang, Tianyu Zhang, Ahmed Masry, Chri...",2025-10-03T17:59:34Z,"GUI grounding, the task of mapping natural-lan...",http://arxiv.org/pdf/2510.03230v1,http://arxiv.org/abs/2510.03230v1
2,Test-Time Defense Against Adversarial Attacks ...,"Dong Lao, Yuxiang Zhang, Haniyeh Ehsani Oskoui...",2025-10-03T17:57:25Z,We propose a test-time defense mechanism again...,http://arxiv.org/pdf/2510.03224v1,http://arxiv.org/abs/2510.03224v1
3,Self-Anchor: Large Language Model Reasoning vi...,"Hongxiang Zhang, Yuan Tian, Tianyi Zhang",2025-10-03T17:56:33Z,To solve complex reasoning tasks for Large Lan...,http://arxiv.org/pdf/2510.03223v1,http://arxiv.org/abs/2510.03223v1
4,Abstain and Validate: A Dual-LLM Policy for Re...,"José Cambronero, Michele Tufano, Sherry Shi, R...",2025-10-03T17:53:28Z,Agentic Automated Program Repair (APR) is incr...,http://arxiv.org/pdf/2510.03217v1,http://arxiv.org/abs/2510.03217v1
5,Wave-GMS: Lightweight Multi-Scale Generative M...,"Talha Ahmed, Nehal Ahmed Shaikh, Hassan Mohy-u...",2025-10-03T17:53:16Z,For equitable deployment of AI tools in hospit...,http://arxiv.org/pdf/2510.03216v1,http://arxiv.org/abs/2510.03216v1
6,Coevolutionary Continuous Discrete Diffusion: ...,"Cai Zhou, Chenxiao Yang, Yi Hu, Chenyu Wang, C...",2025-10-03T17:44:41Z,"Diffusion language models, especially masked d...",http://arxiv.org/pdf/2510.03206v1,http://arxiv.org/abs/2510.03206v1
7,CoDA: Agentic Systems for Collaborative Data V...,"Zichen Chen, Jiefeng Chen, Sercan Ö. Arik, Mis...",2025-10-03T17:30:16Z,Deep research has revolutionized data analysis...,http://arxiv.org/pdf/2510.03194v1,http://arxiv.org/abs/2510.03194v1
8,Simulation to Rules: A Dual-VLM Framework for ...,"Yilun Hao, Yongchao Chen, Chuchu Fan, Yang Zhang",2025-10-03T16:57:01Z,Vision Language Models (VLMs) show strong pote...,http://arxiv.org/pdf/2510.03182v1,http://arxiv.org/abs/2510.03182v1
9,Topic Modeling as Long-Form Generation: Can Lo...,"Xuan Xu, Haolun Li, Zhongliang Yang, Beilin Ch...",2025-10-03T16:48:32Z,Traditional topic models such as neural topic ...,http://arxiv.org/pdf/2510.03174v1,http://arxiv.org/abs/2510.03174v1
