# 📥 Download arXiv cs.CL Papers
This notebook downloads the latest 50 papers from arXiv's `cs.CL` (Computation and Language) category.

In [None]:
# ✅ Install required libraries
!pip install feedparser tqdm

In [None]:
import os
import requests
import feedparser
from tqdm import tqdm

In [None]:
# 📂 Create a directory to save PDFs
save_dir = "arxiv_cs.CL_pdfs"
os.makedirs(save_dir, exist_ok=True)

In [None]:
# 🔍 Query arXiv API for cs.CL papers
base_url = "http://export.arxiv.org/api/query"
search_query = "cat:cs.CL"
max_results = 50

query = f"{base_url}?search_query={search_query}&start=0&max_results={max_results}"
feed = feedparser.parse(query)
print(f"Found {len(feed.entries)} papers.")

In [None]:
# ⬇️ Download each paper's PDF
for entry in tqdm(feed.entries):
    title = entry.title.replace("\n", " ").strip()
    pdf_url = None
    for link in entry.links:
        if link.rel == "alternate":
            arxiv_id = link.href.split("/")[-1]
        if link.type == "application/pdf":
            pdf_url = link.href

    if pdf_url:
        try:
            response = requests.get(pdf_url)
            pdf_path = os.path.join(save_dir, f"{arxiv_id}.pdf")
            with open(pdf_path, "wb") as f:
                f.write(response.content)
        except Exception as e:
            print(f"Failed to download {arxiv_id}: {e}")

print(f"✅ Downloaded {len(feed.entries)} PDFs to '{save_dir}'")