## Get pdfs from arxiv.org


In [17]:
# Set up to use local modules
%load_ext autoreload
%autoreload 2
import os
import sys
module_path = os.path.abspath(os.path.join('..')) # Add parent directory to path
sys.path.insert(0, module_path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import pyprojroot

from src import utils

PDF_DIR = pyprojroot.here("data")

In [3]:
# https://lukasschwab.me/arxiv.py/arxiv.html

import arxiv

# Construct the default API client.
client = arxiv.Client()

# Search for the 10 most recent articles matching the keyword "quantum."
search = arxiv.Search(
    query="quantum", max_results=10, sort_by=arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)

# `results` is a generator; you can iterate over its elements one by one...
for r in client.results(search):
    print(r.title)
# ...or exhaust it into a list. Careful: this is slow for large results sets.
# all_results = list(results)
# print([r.title for r in all_results])

Observation of an inverse turbulent-wave cascade in a driven quantum gas
Robustness of Fixed Points of Quantum Channels and Application to Approximate Quantum Markov Chains
Polarization dependent non-Hermitian atomic grating controlled by dipole blockade effect
Centerless-BMS charge algebra
Driven Multiphoton Qubit-Resonator Interactions
Geometric Quantization Without Polarizations
Effective Lifshitz black holes, hydrodynamics, and transport coefficients in fluid/gravity correspondence
Optical Manipulation of Spin States in Ultracold Magnetic Atoms via an Inner-Shell Hz Transition
Single-layer tensor network approach for three-dimensional quantum systems
A Formulation of Quantum Fluid Mechanics and Trajectories


In [4]:
pdf_ids = ["1706.03762v6", "1605.08386v1"]

# Search for the paper with ID "1605.08386v1"
search_by_id = arxiv.Search(id_list=[pdf_ids[0]])
paper = next(client.results(search_by_id))
print(paper.title)

paper.download_pdf(dirpath=PDF_DIR, filename="example_paper.pdf")

Attention Is All You Need


'/home/jordan/documents/GitHub/arxiv-chat/data/example_paper.pdf'

In [21]:
papers = utils.local_papers

utils.get_local_papers(papers=papers, silent=False)

Already downloaded: Attention is All You Need
Already downloaded: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
Already downloaded: Generative Adversarial Nets
Already downloaded: Playing Atari with Deep Reinforcement Learning
Already downloaded: ImageNet Classification with Deep Convolutional Neural Networks


## Get most recent ML papers from arxiv


In [29]:
# Use the `arxiv` package to get the 10 most recent papers on the topic of cs.LG
# Papers categorized with stat.ML as primary are automatically cross-listed as cs.LG but not vice versa.
# Computer science: Machine Learning = cs.LG
# Other,
# Statistics: Machine Learning = stat.ML
# Computer science: Artificial Intelligence = cs.AI
# Computer science: Neural and Evolutionary Computing = cs.NE
# Computer science: Systems and Control = cs.SY
# Math: Optimization and Control = math.OC

search = arxiv.Search(
    query="cs.LG", max_results=10, sort_by=arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)
# Print the titles
for r in results:
    print(r.title)
    print(r.summary)

Multi-Space Alignments Towards Universal LiDAR Segmentation
A unified and versatile LiDAR segmentation model with strong robustness and
generalizability is desirable for safe autonomous driving perception. This work
presents M3Net, a one-of-a-kind framework for fulfilling multi-task,
multi-dataset, multi-modality LiDAR segmentation in a universal manner using
just a single set of parameters. To better exploit data volume and diversity,
we first combine large-scale driving datasets acquired by different types of
sensors from diverse scenes and then conduct alignments in three spaces, namely
data, feature, and label spaces, during the training. As a result, M3Net is
capable of taming heterogeneous data for training state-of-the-art LiDAR
segmentation models. Extensive experiments on twelve LiDAR segmentation
datasets verify our effectiveness. Notably, using a shared set of parameters,
M3Net achieves 75.1%, 83.1%, and 72.4% mIoU scores, respectively, on the
official benchmarks of Semantic

In [24]:
# Get the first paper from results
paper = next(client.results(search))

In [25]:
paper

arxiv.Result(entry_id='http://arxiv.org/abs/2405.01538v1', updated=datetime.datetime(2024, 5, 2, 17, 59, 57, tzinfo=datetime.timezone.utc), published=datetime.datetime(2024, 5, 2, 17, 59, 57, tzinfo=datetime.timezone.utc), title='Multi-Space Alignments Towards Universal LiDAR Segmentation', authors=[arxiv.Result.Author('Youquan Liu'), arxiv.Result.Author('Lingdong Kong'), arxiv.Result.Author('Xiaoyang Wu'), arxiv.Result.Author('Runnan Chen'), arxiv.Result.Author('Xin Li'), arxiv.Result.Author('Liang Pan'), arxiv.Result.Author('Ziwei Liu'), arxiv.Result.Author('Yuexin Ma')], summary='A unified and versatile LiDAR segmentation model with strong robustness and\ngeneralizability is desirable for safe autonomous driving perception. This work\npresents M3Net, a one-of-a-kind framework for fulfilling multi-task,\nmulti-dataset, multi-modality LiDAR segmentation in a universal manner using\njust a single set of parameters. To better exploit data volume and diversity,\nwe first combine large-sc

In [36]:
# Get all ML papers from today

# https://export.arxiv.org/api/query?search_query=cat:cs.LG+AND+submittedDate:[202001130630+TO+202001131645]

query = "cs.LG"
# query by submitteDate is not implemented in the Python API
# query = "cat:cs.LG+AND+submittedDate:[202001130630+TO+202101131645]"
search = arxiv.Search(
    query=query, max_results=10, sort_by=arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)
# Print the titles
for r in results:
    print(r.title)
    print(r.published)