<a href="https://colab.research.google.com/github/forouzanfallah/Search_ArXiv_for_Papers/blob/main/Search_ArXiv_for_Papers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install arxiv


Collecting arxiv
  Downloading arxiv-2.1.0-py3-none-any.whl (11 kB)
Collecting feedparser==6.0.10 (from arxiv)
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m829.1 kB/s[0m eta [36m0:00:00[0m
Collecting sgmllib3k (from feedparser==6.0.10->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6049 sha256=d4fc80efb75bb3cbfffa2b9b286c09f5aae8455c927f293c74f75b26576d39e1
  Stored in directory: /root/.cache/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser, arxiv
Successfully installed arxiv-2.1.0 feedparser-6.0.10 sgmllib3k-1.0.0


In [2]:
import arxiv
import datetime

class ArxivSearcher:
    def __init__(self):
        """Initialize the arXiv searcher with a client."""
        self.client = arxiv.Client()

    def search(self, keywords, start_date, end_date, max_results=1000):
        """Search arXiv for papers matching the given keywords within the specified date range,
        up to max_results after filtering by date, and include the submission date in the output."""

        # Format the search query
        search_query = ' AND '.join(f'all:"{keyword}"' for keyword in keywords)

        # Format the dates
        start = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
        end = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()

        # Initialize an empty list to store filtered results
        filtered_results = []

        # Assume we fetch more initially to ensure we have enough results to filter through
        initial_fetch = 100

        # Perform search
        search = arxiv.Search(
            query=search_query,
            max_results=initial_fetch,  # Fetch more results initially
            sort_by=arxiv.SortCriterion.SubmittedDate,
            sort_order=arxiv.SortOrder.Descending
        )

        # Fetch results and filter
        for result in self.client.results(search):
            # Check if the result is within the specified date range
            if start <= result.published.date() <= end:
                filtered_results.append(result)
                # Break if we have enough filtered results
                if len(filtered_results) == max_results:
                    break

        # Display the filtered results
        for result in filtered_results:
            print(f"Title: {result.title}\nLink: {result.entry_id}\nSubmitted Date: {result.published.strftime('%Y-%m-%d')}\n")




In [5]:
# Example usage
if __name__ == "__main__":
    searcher = ArxivSearcher()
    keywords = ['visual text']  # Add your keywords here
    start_date = '2024-01-01'  # Start date in YYYY-MM-DD format
    end_date = '2024-03-29'  # End date in YYYY-MM-DD format
    max_results = 105  # Specify the maximum number of results to return after filtering
    searcher.search(keywords, start_date, end_date, max_results)

Title: Refining Text-to-Image Generation: Towards Accurate Training-Free Glyph-Enhanced Image Generation
Link: http://arxiv.org/abs/2403.16422v1
Submitted Date: 2024-03-25

Title: Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering
Link: http://arxiv.org/abs/2403.09622v1
Submitted Date: 2024-03-14

Title: UniCode: Learning a Unified Codebook for Multimodal Large Language Models
Link: http://arxiv.org/abs/2403.09072v1
Submitted Date: 2024-03-14

Title: Answering Diverse Questions via Text Attached with Key Audio-Visual Clues
Link: http://arxiv.org/abs/2403.06679v1
Submitted Date: 2024-03-11

Title: NPHardEval4V: A Dynamic Reasoning Benchmark of Multimodal Large Language Models
Link: http://arxiv.org/abs/2403.01777v2
Submitted Date: 2024-03-04

Title: Towards Accurate Lip-to-Speech Synthesis in-the-Wild
Link: http://arxiv.org/abs/2403.01087v1
Submitted Date: 2024-03-02

Title: VIXEN: Visual Text Comparison Network for Image Difference Captioning
Link: http://arxiv.or