In [8]:
import arxiv
import datetime
import pandas as pd

In [16]:
def fetch_ai_papers_to_dataframe(days_ago=DAYS_AGO, max_results=MAX_RESULTS):
    """
    Fetches extensive metadata for the latest AI-related papers from arXiv 
    and returns it as a Pandas DataFrame.
    """
    print(f"🚀 Fetching metadata for the latest {MAX_RESULTS} AI papers from the last {DAYS_AGO} days...")
    
    # Calculate the date for one week ago from today
    one_week_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=DAYS_AGO)
    
    # Construct the search query for major AI categories
    query = "cat:cs.AI OR cat:cs.LG OR cat:cs.CL OR cat:cs.CV"
    
    # Create the search object
    search = arxiv.Search(
      query=query,
      max_results=MAX_RESULTS,
      sort_by=arxiv.SortCriterion.SubmittedDate,
      sort_order=arxiv.SortOrder.Descending
    )
    
    # List to hold the metadata for each paper
    papers_data = []
    
    # Iterate over the search results
    for result in search.results():
        # Check if the paper was published within the last week
        if result.published >= one_week_ago:
            
            # Extract core arXiv ID from the entry_id URL
            arxiv_id = result.entry_id.split('/')[-1]
            
            # Extract author names into a simple list
            author_names = [author.name for author in result.authors]
            
            # Create a dictionary with all the desired metadata
            paper_info = {
                'arxiv_id': arxiv_id,
                'title': result.title,
                'authors': author_names,
                'summary': result.summary,
                'published_date': result.published,
                'updated_date': result.updated,
                'pdf_url': result.pdf_url,
                'primary_category': result.primary_category,
                'all_categories': result.categories,
                'doi': result.doi,
                'journal_ref': result.journal_ref,
                'comments': result.comment
            }
            papers_data.append(paper_info)
            
    if not papers_data:
        print("No new papers found in the specified categories for the last week.")
        return None
        
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(papers_data)
    
    print(f"\n✅ Successfully collected metadata for {len(df)} papers.")
    return df

In [17]:
df = fetch_ai_papers_to_dataframe(days_ago=7, max_results=100)

🚀 Fetching metadata for the latest 100 AI papers from the last 7 days...

✅ Successfully collected metadata for 100 papers.


  for result in search.results():


In [18]:
df

Unnamed: 0,arxiv_id,title,authors,summary,published_date,updated_date,pdf_url,primary_category,all_categories,doi,journal_ref,comments
0,2510.05102v1,TopInG: Topologically Interpretable Graph Lear...,"[Cheng Xin, Fan Xu, Xin Ding, Jie Gao, Jiaxin ...",Graph Neural Networks (GNNs) have shown remark...,2025-10-06 17:59:44+00:00,2025-10-06 17:59:44+00:00,http://arxiv.org/pdf/2510.05102v1,cs.LG,"[cs.LG, cs.AI, cs.CG, math.AT, stat.ML, 55N31,...",,,submitted to ICML 2025
1,2510.05097v1,Pulp Motion: Framing-aware multimodal camera a...,"[Robin Courant, Xi Wang, David Loiseaux, Marc ...",Treating human motion and camera trajectory ge...,2025-10-06 17:58:34+00:00,2025-10-06 17:58:34+00:00,http://arxiv.org/pdf/2510.05097v1,cs.GR,"[cs.GR, cs.CV]",,,Project page:\n https://www.lix.polytechnique...
2,2510.05096v1,Paper2Video: Automatic Video Generation from S...,"[Zeyu Zhu, Kevin Qinghong Lin, Mike Zheng Shou]",Academic presentation videos have become an es...,2025-10-06 17:58:02+00:00,2025-10-06 17:58:02+00:00,http://arxiv.org/pdf/2510.05096v1,cs.CV,"[cs.CV, cs.AI, cs.CL, cs.MA, cs.MM]",,,"20 pages, 8 figures"
3,2510.05095v1,From Noisy Traces to Stable Gradients: Bias-Va...,"[Mingkang Zhu, Xi Chen, Bei Yu, Hengshuang Zha...",Large reasoning models (LRMs) generate interme...,2025-10-06 17:58:01+00:00,2025-10-06 17:58:01+00:00,http://arxiv.org/pdf/2510.05095v1,cs.LG,"[cs.LG, cs.AI, cs.CL]",,,
4,2510.05094v1,VChain: Chain-of-Visual-Thought for Reasoning ...,"[Ziqi Huang, Ning Yu, Gordon Chen, Haonan Qiu,...",Recent video generation models can produce smo...,2025-10-06 17:57:59+00:00,2025-10-06 17:57:59+00:00,http://arxiv.org/pdf/2510.05094v1,cs.CV,[cs.CV],,,Project page: https://eyeline-labs.github.io/V...
...,...,...,...,...,...,...,...,...,...,...,...,...
95,2510.04862v1,Video Game Level Design as a Multi-Agent Reinf...,"[Sam Earle, Zehua Jiang, Eugene Vinitsky, Juli...",Procedural Content Generation via Reinforcemen...,2025-10-06 14:49:21+00:00,2025-10-06 14:49:21+00:00,http://arxiv.org/pdf/2510.04862v1,cs.AI,"[cs.AI, cs.LG, cs.MA, cs.NE]",,,"11 pages, 7 tables, 5 figures, published as fu..."
96,2510.04861v1,A Clinical-grade Universal Foundation Model fo...,"[Zihan Zhao, Fengtao Zhou, Ronggang Li, Bing C...",Intraoperative pathology is pivotal to precisi...,2025-10-06 14:48:43+00:00,2025-10-06 14:48:43+00:00,http://arxiv.org/pdf/2510.04861v1,cs.LG,[cs.LG],,,
97,2510.04860v1,Alignment Tipping Process: How Self-Evolution ...,"[Siwei Han, Jiaqi Liu, Yaofeng Su, Wenbo Duan,...",As Large Language Model (LLM) agents increasin...,2025-10-06 14:48:39+00:00,2025-10-06 14:48:39+00:00,http://arxiv.org/pdf/2510.04860v1,cs.LG,"[cs.LG, cs.AI]",,,
98,2510.04859v1,μDeepIQA: deep learning-based fast and robust ...,"[Elena Corbetta, Thomas Bocklitz]",Optical microscopy is one of the most widely u...,2025-10-06 14:48:36+00:00,2025-10-06 14:48:36+00:00,http://arxiv.org/pdf/2510.04859v1,cs.CV,"[cs.CV, physics.data-an, q-bio.QM]",,,"16 pages, 6 figures. \mu DeepIQA is publicly a..."
