In [3]:
!pip install arxiv

import arxiv
import pandas as pd

# --- CONFIGURACIÓN ---
query = "quantum machine learning"  # cambia por tu tema
max_papers = 1000                    # número de abstracts que quieres

# --- BÚSQUEDA ---
search = arxiv.Search(
    query=query,
    max_results=max_papers,
    sort_by=arxiv.SortCriterion.SubmittedDate
)

# --- scrapear RESULTADOS ---
data = []
for paper in search.results():
    data.append({
        "title": paper.title,
        "authors": [a.name for a in paper.authors],
        "abstract": paper.summary,
        "arxiv_id": paper.get_short_id(),
        "category": paper.primary_category,
        "date": paper.published,
        "pdf_url": paper.pdf_url
    })

df = pd.DataFrame(data)

# --- GUARDAR EN CSV ---
df.to_csv("arxiv_abstracts.csv", index=False)

df.head()




  for paper in search.results():


Unnamed: 0,title,authors,abstract,arxiv_id,category,date,pdf_url
0,EfficientFlow: Efficient Equivariant Flow Poli...,"[Jianlei Chang, Ruofeng Mei, Wei Ke, Xiangyu Xu]",Generative modeling has recently shown remarka...,2512.02020v1,cs.RO,2025-12-01 18:59:59+00:00,https://arxiv.org/pdf/2512.02020v1
1,A Diffusion Model Framework for Maximum Entrop...,"[Sebastian Sanokowski, Kaustubh Patil, Alois K...",Diffusion models have achieved remarkable succ...,2512.02019v1,cs.LG,2025-12-01 18:59:58+00:00,https://arxiv.org/pdf/2512.02019v1
2,Visual Sync: Multi-Camera Synchronization via ...,"[Shaowei Liu, David Yifan Yao, Saurabh Gupta, ...","Today, people can easily record memorable mome...",2512.02017v1,cs.CV,2025-12-01 18:59:57+00:00,https://arxiv.org/pdf/2512.02017v1
3,Improved Mean Flows: On the Challenges of Fast...,"[Zhengyang Geng, Yiyang Lu, Zongze Wu, Eli She...",MeanFlow (MF) has recently been established as...,2512.02012v1,cs.CV,2025-12-01 18:59:49+00:00,https://arxiv.org/pdf/2512.02012v1
4,Learning Dexterous Manipulation Skills from Im...,"[Elvis Hsieh, Wen-Han Hsieh, Yen-Jen Wang, Tor...",Reinforcement learning and sim-to-real transfe...,2512.02011v1,cs.RO,2025-12-01 18:59:45+00:00,https://arxiv.org/pdf/2512.02011v1


# Cómo construir una ecuación de búsqueda para arXiv

Una **ecuación de búsqueda en arXiv** es una cadena de texto que combina palabras clave con operadores lógicos para localizar papers relevantes de manera precisa.

## 1. Operadores booleanos

### **`AND`**
El documento debe contener *ambos* términos.
quantum AND "partial differential equation"

### **`OR`**
El documento puede contener uno u otro término.
PDE OR "differential equations"

### **`AND NOT`**
Incluye un término y excluye otro.
quantum AND NOT chemistry

## 2. Búsquedas exactas con comillas
Usa comillas para buscar frases completas:
"quantum neural network"

Sin comillas se busca cada palabra por separado.

## 3. Agrupar ideas con paréntesis

Sirve para combinar varios conceptos:
("quantum neural network" OR "quantum machine learning")
AND
("partial differential equation" OR PDE)

## 4. Buscar en categorías específicas de arXiv

ArXiv clasifica los papers por áreas. Para buscar solo dentro de una categoría:

cat:quant-ph
cat:cs.LG
cat:math.AP

Ejemplo:
("quantum neural" AND PDE) AND cat:quant-ph

## 5. Buscar por autor
au:"John Preskill"

## 6. Buscar por fechas
submittedDate:[2020 TO 2025]

Ejemplo combinado: "quantum neural network" AND submittedDate:[2021 TO 2025]

## 7. Ejemplos listos de ecuaciones de búsqueda (tema: QNN + PDE)

### **Búsqueda general**
"quantum neural" AND PDE

### **Búsqueda amplia**
("quantum neural network" OR "quantum machine learning")
AND
("partial differential equation" OR PDE)

### **Solo categoría Quantum Physics**
("quantum neural" AND PDE) AND cat:quant-ph

### **PINNs cuánticas**
("quantum" AND "physics-informed neural network") AND cat:cs.LG

### **Con sinónimos adicionales**
("quantum neural" OR "quantum circuit learning")
AND
("partial differential equation" OR PDE OR "differential equation")


## 8. Pasos para construir tu ecuación de búsqueda

1. **Define tu tema.**
2. **Elige palabras clave.**
3. **Identifica sinónimos.**
4. **Combina usando `AND` y `OR`.**
5. **(Opcional) Restringe por categoría (`cat:`).**

In [4]:
# --- CONSULTAS ---
queries = [
    '"quantum neural network" AND "partial differential equation"',
    '"quantum machine learning" AND "differential equation"',
    '"quantum neural" AND PDE',
    '"quantum" AND "physics-informed neural networks"',
    '"quantum" AND PINN',
]

max_results = 200

data = []

for q in queries:
    print(f"Buscando: {q}")
    search = arxiv.Search(
        query=q,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )

    for paper in search.results():
        data.append({
            "title": paper.title,
            "authors": [a.name for a in paper.authors],
            "abstract": paper.summary,
            "arxiv_id": paper.get_short_id(),
            "category": paper.primary_category,
            "date": paper.published,
            "pdf_url": paper.pdf_url,
            "query_used": q
        })

# --- CREAR BASE DE DATOS ---
df = pd.DataFrame(data)

# Eliminar duplicados por ID
df = df.drop_duplicates(subset="arxiv_id")

df.to_csv("arxiv_PDE_QNN.csv", index=False)

print("Base de datos creada con", len(df), "papers.")
df.head()


Buscando: "quantum neural network" AND "partial differential equation"


  for paper in search.results():


Buscando: "quantum machine learning" AND "differential equation"
Buscando: "quantum neural" AND PDE
Buscando: "quantum" AND "physics-informed neural networks"
Buscando: "quantum" AND PINN
Base de datos creada con 351 papers.


Unnamed: 0,title,authors,abstract,arxiv_id,category,date,pdf_url,query_used
0,Quantum Orthogonal Separable Physics-Informed ...,"[Pietro Zanotta, Ljubomir Budinski, Caglar Ayt...",This paper introduces Quantum Orthogonal Separ...,2511.12613v1,quant-ph,2025-11-16 14:15:19+00:00,https://arxiv.org/pdf/2511.12613v1,"""quantum neural network"" AND ""partial differen..."
1,APRIL: Auxiliary Physically-Redundant Informat...,"[Matteo Scialpi, Francesco Di Clemente, Leigh ...",Physics-Informed Neural Networks (PINNs) embed...,2510.13677v1,gr-qc,2025-10-15 15:34:19+00:00,https://arxiv.org/pdf/2510.13677v1,"""quantum neural network"" AND ""partial differen..."
2,Neural PDE Solvers with Physics Constraints: A...,[Jiakang Chen],Partial differential equations (PDEs) underpin...,2510.09693v1,cs.LG,2025-10-09 13:41:51+00:00,https://arxiv.org/pdf/2510.09693v1,"""quantum neural network"" AND ""partial differen..."
3,Quantum Random Feature Method for Solving Part...,"[Junpeng Hu, Shi Jin, Nana Liu, Lei Zhang]",Quantum computing holds significant promise fo...,2510.07945v1,quant-ph,2025-10-09 08:42:09+00:00,https://arxiv.org/pdf/2510.07945v1,"""quantum neural network"" AND ""partial differen..."
4,PALQO: Physics-informed Model for Accelerating...,"[Yiming Huang, Yajie Hao, Jing Zhou, Xiao Yuan...",Variational quantum algorithms (VQAs) are lead...,2509.20733v1,quant-ph,2025-09-25 04:26:02+00:00,https://arxiv.org/pdf/2509.20733v1,"""quantum neural network"" AND ""partial differen..."


# **TALLER PLN**

Cada estudiante o grupo entregará:
1. Tema preseleccionado de investigación para tesis.
2. Dataset con ≥ 50 abstracts de arXiv.
3. Limpieza básica del texto.
4. Matriz TF-IDF y otros vectorizadores.
6. Clustering simple (K-means por lo menos, pueden intentar con otros algoritmos de clustering no hay límite).
7. Visualización 2D (opcional).
8. Interpretación de los clusters.
9. Conclusión: ¿qué subtema parece más prometedor para su tesis?