In [None]:
import arxiv
import os
import tarfile

# Construct the default API client.
client = arxiv.Client()

# Search for the 10 most recent articles matching the keyword "quantum."
cat = 'Computer Science'
tmpdir = './tarfiles'
dirpath = f'./selected_papers/{cat}'

if not os.path.exists(dirpath):
  os.makedirs(dirpath)

if not os.path.exists(tmpdir):
    os.makedirs(tmpdir)


search = arxiv.Search(
  query = f"cat={cat}",
  sort_by = arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)

successful_downloads = 0

for i,r in enumerate(client.results(search)):
    filename = f"paper_{i}.tar.gz"
    filepath = r.download_source(dirpath=tmpdir, filename=filename)
    try:
        with tarfile.open(filepath, 'r:gz') as tar:
            files = tar.getmembers()
            tex_files = [f.name for f in files if f.name.endswith('.tex')]
            if len(tex_files) == 1:
                tex_file = tex_files[0]
                print(f"Extracting {tex_file}")
                tar.extract(tex_file, path=dirpath)
                os.rename(os.path.join(dirpath, tex_file), os.path.join(dirpath, f'paper_{i}.tex'))
                successful_downloads += 1
            else:
                print(f"Paper {i} has {len(tex_files)} tex files. Skipping.")
            tar.close()
    except Exception as e:
        print(f"Error extracting {filepath}: {e}")
    os.remove(filepath)
    if successful_downloads == 10:
        break

os.removedirs(tmpdir)
