In [1]:
from pymongo import MongoClient
from datetime import datetime

# Conexión al cluster MongoDB
client = MongoClient(
    "mongodb://mongo1:30001,mongo2:30002,mongo3:30003/?replicaSet=my-replica-set"
)
db = client["arxiv_db"]
collection = db["articles"]

## a. Devolver los títulos y fechas de creación de artículos publicados en el año 2025. Mostrar solo esos campos y limitar a los primeros 20 resultados.

In [16]:
resultado = collection.aggregate(
    [  
        {
            "$match": {  
                "versions": {
                    "$elemMatch": {
                        "version": "v1",
                        "created": {"$regex": "2025"}
                    }
                }
            }
        },
        {
            "$project": {
                "title": 1,
                "created": "$versions.created",
                "_id": 0
            }
        },
        {
            "$limit": 20
        }
    ]
)
c = 1
for i in resultado:
    print(c, ". ", i)
    c += 1
    

1 .  {'title': 'Minkowski problem of anisotropic p-torsional rigidity', 'created': ['Wed, 1 Jan 2025 00:38:34 GMT', 'Sun, 5 Jan 2025 03:04:59 GMT']}
2 .  {'title': 'Gravitational Instantons, old and new', 'created': ['Wed, 1 Jan 2025 00:38:55 GMT']}
3 .  {'title': 'A system of Schr\\"odinger\'s problems and functional equations', 'created': ['Wed, 1 Jan 2025 04:24:07 GMT']}
4 .  {'title': 'Category O for quantum loop algebras', 'created': ['Wed, 1 Jan 2025 05:01:39 GMT', 'Thu, 9 Jan 2025 12:06:04 GMT']}
5 .  {'title': 'Automatic Construction of Pattern Classifiers Capable of Continuous\n  Incremental Learning and Unlearning Tasks Based on Compact-Sized\n  Probabilistic Neural Network', 'created': ['Wed, 1 Jan 2025 05:02:53 GMT']}
6 .  {'title': 'Beyond Static Datasets: A Behavior-Driven Entity-Specific Simulation to\n  Overcome Data Scarcity and Train Effective Crypto Anti-Money Laundering\n  Models', 'created': ['Wed, 1 Jan 2025 06:58:05 GMT']}
7 .  {'title': 'Excess Ultraviolet Emiss

## b. Devolver los títulos y los autores de artículos que pertenezcan a las categorías "cs.AI" o "stat.ML" y que tengan al menos tres autores. Mostrar solo esos campos y limitar a los primeros 10 resultados.

In [17]:
resultado = collection.aggregate([
    {
        "$match": {
            "$or":[
                {"categories": {"$regex": "cs\\.AI"}},
                {"categories": {"$regex": "stat\\.ML"}}
            ],
            "authors_parsed": {"$exists": True},
            "$expr": {"$gte": [{"$size": "$authors_parsed"}, 3]} 
            
        }
    },
    {
        "$project":{
            "_id": 0,
            "title": 1,
            "authors": 1
            
        }
    },
    {
        "$limit": 10
    }
    
])


c = 1
for i in resultado:
    print(c, ". ", i)
    c += 1
    

1 .  {'authors': 'Tarik Hadzic, Rune Moller Jensen, Henrik Reif Andersen', 'title': 'Calculating Valid Domains for BDD-Based Interactive Configuration'}
2 .  {'authors': 'Kristina Lerman, Anon Plangprasopchok and Chio Wong', 'title': 'Personalizing Image Search Results on Flickr'}
3 .  {'authors': 'Stefano Bistarelli, Ugo Montanari, Francesca Rossi, Francesco Santini', 'title': 'Unicast and Multicast Qos Routing with Soft Constraint Logic Programming'}
4 .  {'authors': 'Juliana S Bernardes, Alberto Davila, Vitor Santos Costa, Gerson\n  Zaverucha', 'title': 'A study of structural properties on profiles HMMs'}
5 .  {'authors': 'H. Satori, M. Harti and N. Chenfour', 'title': 'Introduction to Arabic Speech Recognition Using CMUSphinx System'}
6 .  {'authors': 'H. Satori, M. Harti and N. Chenfour', 'title': 'Arabic Speech Recognition System using CMU-Sphinx4'}
7 .  {'authors': 'Giorgio Terracina, Nicola Leone, Vincenzino Lio, Claudio Panetta', 'title': 'Experimenting with recursive queries 

## c. Devolver los títulos, las categorías y los enlaces al PDF de artículos que pertenezcan a la categoría "hep-ph" y tengan un DOI asignado. Mostrar solo esos campos y limitar a 15 resultados.

In [18]:
resultado = collection.aggregate([
    {
        "$match":{
            "doi": {"$exists": True, "$ne": None},
            "categories": {"$regex": "hep-ph"}
        }
    },
    {
        "$project":{
            "_id": 0,
            "title": 1,
            "categories": 1,
            "pdf_source": 1
        }
    },
    {
        "$limit": 15
    }
])

c = 1
for i in resultado:
    print(c, ". ", i)
    c += 1
    

1 .  {'title': 'Calculation of prompt diphoton production cross sections at Tevatron and\n  LHC energies', 'categories': 'hep-ph', 'pdf_source': 'https://arxiv.org/pdf/0704.0001'}
2 .  {'title': 'Lifetime of doubly charmed baryons', 'categories': 'hep-ph', 'pdf_source': 'https://arxiv.org/pdf/0704.0016'}
3 .  {'title': 'Understanding the Flavor Symmetry Breaking and Nucleon Flavor-Spin\n  Structure within Chiral Quark Model', 'categories': 'hep-ph', 'pdf_source': 'https://arxiv.org/pdf/0704.0029'}
4 .  {'title': 'Crystal channeling of LHC forward protons with preserved distribution in\n  phase space', 'categories': 'hep-ph', 'pdf_source': 'https://arxiv.org/pdf/0704.0031'}
5 .  {'title': 'Probing non-standard neutrino interactions with supernova neutrinos', 'categories': 'hep-ph', 'pdf_source': 'https://arxiv.org/pdf/0704.0032'}
6 .  {'title': 'Scalar radius of the pion and zeros in the form factor', 'categories': 'hep-ph hep-lat nucl-th', 'pdf_source': 'https://arxiv.org/pdf/0704.0039

## d. Devolver los títulos, nombres de los autores y la referencia de publicación (journal-ref) de los artículos que tengan un DOI asignado. Mostrar solo esos campos y ordenar los resultados alfabéticamente por título. Limitar a los primeros 20 resultados.

In [19]:
resultado = collection.aggregate([
    {
        "$match":{
            "doi": {"$exists": True, "$ne": None}
        }
    },
    {
        "$project":{
            "_id": 0,
            "title": 1,
            "authors": 1,
            "journal-ref": 1
        }
    },
    {
      "$sort": {"title": 1}  
    },
    {
        "$limit": 20
    }
])

c = 1
for i in resultado:
    print(c, ". ", i)
    c += 1

1 .  {'authors': 'Aleks Kissinger (University of Oxford), Vladimir Zamdzhiev (University\n  of Oxford)', 'title': '!-Graphs with Trivial Overlap are Context-Free', 'journal-ref': 'EPTCS 181, 2015, pp. 16-31'}
2 .  {'authors': 'Oscar Henriksson and Christopher Rosen', 'title': '"$1k_F$" Singularities and Finite Density ABJM Theory at Strong Coupling', 'journal-ref': None}
3 .  {'authors': 'Olga Zagovora (1), Fabian Fl\\"ock (1), Claudia Wagner (1 and 2) ((1)\n  GESIS - Leibniz Institute for the Social Sciences, (2) University of\n  Koblenz-Landau)', 'title': '"(Weitergeleitet von Journalistin)": The Gendered Presentation of\n  Professions on Wikipedia', 'journal-ref': None}
4 .  {'authors': 'Stavros Mouslopoulos and Antonios Papazoglou (Oxford University)', 'title': '"+-+" Brane Model Phenomenology', 'journal-ref': 'JHEP 0011 (2000) 018'}
5 .  {'authors': 'K.S. Babu, Jogesh C. Pati and Hanns Stremnitzer', 'title': '"A Hint From the Inter-Family Mass Hierarchy: Two Vector-Like Families\n

## e. Devolver los títulos y la fecha de la primera versión (versions.created) de los artículos enviados entre los años 2010 y 2015. Mostrar solo esos campos y limitar a los primeros 15 resultados.

In [None]:
resultado = collection.aggregate([
    {
        "$match":{
            "versions":{
                "$elemMatch":{
                    "created": {"$regex": "201[0-5]"}
                }
            }
        }
    },
    
])

In [3]:
resultado = collection.find_one()
print(resultado)

{'_id': ObjectId('6848db8fc4779460327cb43b'), 'id': '0704.0017', 'submitter': 'Nceba Mhlahlo', 'authors': 'Nceba Mhlahlo, David H. Buckley, Vikram S. Dhillon, Steven B. Potter,\n  Brian Warner and Patric A. Woudt', 'title': 'Spectroscopic Observations of the Intermediate Polar EX Hydrae in\n  Quiescence', 'comments': '10 pages, 11 figures (figures 3, 4, 7 and 8 at reduced resolution,\n  originals available on request). Accepted for publication in Monthly Notices\n  of the Royal Astronomical Society', 'journal-ref': 'Mon.Not.Roy.Astron.Soc.378:211-220,2007', 'doi': '10.1111/j.1365-2966.2007.11762.x', 'report-no': None, 'categories': 'astro-ph', 'license': None, 'abstract': '  Results from spectroscopic observations of the Intermediate Polar (IP) EX Hya\nin quiescence during 1991 and 2001 are presented. Spin-modulated radial\nvelocities consistent with an outer disc origin were detected for the first\ntime in an IP. The spin pulsation was modulated with velocities near ~500-600\nkm/s. Th

In [13]:
resultado = collection.aggregate(
    [  # El pipeline debe ser una lista
        {
            "$match": {  # Usa $match para filtrar documentos
                "versions": {
                    "$elemMatch": {
                        "version": "v1",
                        "created": {"$regex": "2025"}
                    }
                }
            }
        },
        {
            "$project": {
                "title": 1,
                "created": "$versions.created",
                "_id": 0
            }
        },
        {
            "$limit": 5
        }
    ]
)

for i in resultado:
    print(i)

{'title': 'Minkowski problem of anisotropic p-torsional rigidity', 'created': ['Wed, 1 Jan 2025 00:38:34 GMT', 'Sun, 5 Jan 2025 03:04:59 GMT']}
{'title': 'Gravitational Instantons, old and new', 'created': ['Wed, 1 Jan 2025 00:38:55 GMT']}
{'title': 'A system of Schr\\"odinger\'s problems and functional equations', 'created': ['Wed, 1 Jan 2025 04:24:07 GMT']}
{'title': 'Category O for quantum loop algebras', 'created': ['Wed, 1 Jan 2025 05:01:39 GMT', 'Thu, 9 Jan 2025 12:06:04 GMT']}
{'title': 'Automatic Construction of Pattern Classifiers Capable of Continuous\n  Incremental Learning and Unlearning Tasks Based on Compact-Sized\n  Probabilistic Neural Network', 'created': ['Wed, 1 Jan 2025 05:02:53 GMT']}


In [13]:
# Opción 1: Usando find() e iterando sobre el cursor
cursor = db.articles.find(
    {
        "versions.created": {
            "$regex": "2025"
        }
    },
    {
        "title": 1,
        "versions.created": 1,
        "_id": 0
    }
).limit(20)

# Iterar y mostrar resultados
print("Títulos y fechas de creación (2025):")
for idx, doc in enumerate(results, 1):
    print(f"{idx}. {doc['title']} - {doc['created']}")
# for document in cursor:
#     print(document)

Títulos y fechas de creación (2025):
1. Hamiltonian Graphs and the Traveling Salesman Problem - Tue, 25 Feb 2025 17:59:04 GMT
2. Proof of Riemann Hypothesis - Wed, 19 Mar 2025 03:35:48 GMT
3. Proof of Riemann Hypothesis - Thu, 10 Apr 2025 02:19:52 GMT
4. The nature of electromagnetic energy - Wed, 14 May 2025 18:20:15 GMT
5. Periodic relativity: the theory of gravity in flat space time - Wed, 15 Jan 2025 06:50:35 GMT
6. Rationalization of EPR Coincidence Experiments - Tue, 1 Apr 2025 18:36:55 GMT
7. Generating functions for borders - Sun, 2 Mar 2025 18:13:43 GMT
8. Data Tables for Lorentz and CPT Violation - Mon, 13 Jan 2025 15:37:10 GMT
9. On Two Related Questions of Wilf Concerning Standard Young Tableaux - Mon, 28 Apr 2025 19:09:23 GMT
10. The Kohn Algorithm on Denjoy-Carleman Classes - Fri, 16 May 2025 14:35:45 GMT
11. A proof of the Riemann hypothesis - Wed, 12 Feb 2025 23:35:59 GMT
12. A proof of the Riemann hypothesis - Thu, 17 Apr 2025 17:01:47 GMT
13. Physics education researc