# Prueba de Conexi√≥n a MongoDB

Este notebook prueba la conexi√≥n a MongoDB usando PyMongo con la cadena de conexi√≥n proporcionada.

## 1. Import Required Libraries

In [1]:
# Importar las librer√≠as necesarias
import pymongo
from pymongo import MongoClient
import pprint
import json

print("PyMongo version:", pymongo.version)

PyMongo version: 3.12.0


## 2. Establish MongoDB Connection

In [7]:
# Crear la conexi√≥n a MongoDB usando la cadena de conexi√≥n proporcionada
connection_string = "mongodb+srv://reader12:reader12@educationalcluster.7xf5hht.mongodb.net/"

try:
    # Crear el cliente MongoDB
    client = pymongo.MongoClient(connection_string)
    print("‚úÖ Cliente MongoDB creado exitosamente")
    print(f"Cadena de conexi√≥n: {connection_string}")
except Exception as e:
    print(f"‚ùå Error al crear el cliente: {e}")

‚úÖ Cliente MongoDB creado exitosamente
Cadena de conexi√≥n: mongodb+srv://reader12:reader12@educationalcluster.7xf5hht.mongodb.net/


## 3. Test Database Connection

In [8]:
# Probar la conexi√≥n haciendo ping al servidor
try:
    # Hacer ping al servidor para verificar la conexi√≥n
    client.admin.command('ping')
    print("‚úÖ ¬°Conexi√≥n exitosa! El servidor MongoDB est√° respondiendo")
    
    # Obtener informaci√≥n del servidor
    server_info = client.server_info()
    print(f"üìä Versi√≥n de MongoDB: {server_info['version']}")
    print(f"üñ•Ô∏è  Plataforma: {server_info.get('os', {}).get('name', 'Unknown')}")
    
except Exception as e:
    print(f"‚ùå Error al conectar con MongoDB: {e}")

‚úÖ ¬°Conexi√≥n exitosa! El servidor MongoDB est√° respondiendo
üìä Versi√≥n de MongoDB: 8.0.15
üñ•Ô∏è  Plataforma: Unknown


## 4. List Available Databases

In [9]:
# Listar las bases de datos disponibles
try:
    database_names = client.list_database_names()
    print(f"üìÅ Bases de datos disponibles ({len(database_names)}):")
    for i, db_name in enumerate(database_names, 1):
        print(f"   {i}. {db_name}")
    
    # Mostrar informaci√≥n detallada de las bases de datos
    print("\nüìã Informaci√≥n detallada:")
    for db_name in database_names:
        db = client[db_name]
        try:
            collections = db.list_collection_names()
            print(f"   ‚Ä¢ {db_name}: {len(collections)} colecci√≥n(es)")
            if collections:
                for collection in collections[:3]:  # Mostrar solo las primeras 3
                    print(f"     - {collection}")
                if len(collections) > 3:
                    print(f"     ... y {len(collections) - 3} m√°s")
        except Exception as e:
            print(f"   ‚Ä¢ {db_name}: No se puede acceder ({str(e)[:50]}...)")

except Exception as e:
    print(f"‚ùå Error al listar bases de datos: {e}")

üìÅ Bases de datos disponibles (1):
   1. sample_airbnb

üìã Informaci√≥n detallada:
   ‚Ä¢ sample_airbnb: 4 colecci√≥n(es)
     - listings_elements
     - listings_dimensions
     - listings_facts
     ... y 1 m√°s


## 5. Test Collection Access

In [10]:
# Probar el acceso a una colecci√≥n espec√≠fica
try:
    # Obtener las bases de datos disponibles
    database_names = client.list_database_names()
    
    if database_names:
        # Usar la primera base de datos disponible (excluyendo las del sistema)
        target_db = None
        for db_name in database_names:
            if db_name not in ['admin', 'local', 'config']:
                target_db = db_name
                break
        
        if target_db:
            print(f"üéØ Probando acceso a la base de datos: {target_db}")
            db = client[target_db]
            collections = db.list_collection_names()
            
            if collections:
                collection_name = collections[0]
                print(f"üìÑ Probando acceso a la colecci√≥n: {collection_name}")
                collection = db[collection_name]
                
                # Contar documentos en la colecci√≥n
                doc_count = collection.count_documents({})
                print(f"üìä N√∫mero de documentos en '{collection_name}': {doc_count}")
                
                # Obtener un documento de muestra (si existe)
                if doc_count > 0:
                    sample_doc = collection.find_one()
                    print(f"üìã Documento de muestra:")
                    # Mostrar las primeras claves del documento
                    if isinstance(sample_doc, dict):
                        keys = list(sample_doc.keys())[:5]  # Primeras 5 claves
                        print(f"   Campos: {keys}")
                        if len(sample_doc.keys()) > 5:
                            print(f"   ... y {len(sample_doc.keys()) - 5} campos m√°s")
                else:
                    print("   La colecci√≥n est√° vac√≠a")
            else:
                print(f"   La base de datos '{target_db}' no tiene colecciones")
        else:
            print("   Solo se encontraron bases de datos del sistema")
    else:
        print("   No se encontraron bases de datos")

except Exception as e:
    print(f"‚ùå Error al acceder a colecciones: {e}")

üéØ Probando acceso a la base de datos: sample_airbnb
üìÑ Probando acceso a la colecci√≥n: listings_elements
üìä N√∫mero de documentos en 'listings_elements': 5555
üìã Documento de muestra:
   Campos: ['_id', 'listing_url', 'name', 'property_type', 'room_type']
   ... y 1 campos m√°s


## 6. Verify Connection Status

In [6]:
# Verificar el estado final de la conexi√≥n
try:
    print("üîç Resumen de la conexi√≥n:")
    print(f"   üîó Estado: Conectado")
    print(f"   üåê Host: educationalcluster.7xf5hht.mongodb.net")
    print(f"   üë§ Usuario: reader123")
    print(f"   üîí SSL: Habilitado (mongodb+srv)")
    
    # Informaci√≥n adicional del servidor
    server_status = client.admin.command("serverStatus")
    print(f"   ‚è∞ Tiempo de actividad del servidor: {server_status.get('uptime', 'N/A')} segundos")
    print(f"   üè† Host del servidor: {server_status.get('host', 'N/A')}")
    
    # Mostrar estad√≠sticas de conexiones
    connections = server_status.get('connections', {})
    print(f"   üìä Conexiones activas: {connections.get('current', 'N/A')}")
    print(f"   üìä Conexiones disponibles: {connections.get('available', 'N/A')}")
    
    print("\n‚úÖ ¬°Todas las pruebas completadas exitosamente!")
    
except Exception as e:
    print(f"‚ùå Error al verificar el estado: {e}")
    
finally:
    # Cerrar la conexi√≥n
    try:
        client.close()
        print("\nüîê Conexi√≥n cerrada correctamente")
    except:
        pass

üîç Resumen de la conexi√≥n:
   üîó Estado: Conectado
   üåê Host: educationalcluster.7xf5hht.mongodb.net
   üë§ Usuario: reader123
   üîí SSL: Habilitado (mongodb+srv)
‚ùå Error al verificar el estado: user is not allowed to do action [serverStatus] on [admin.], full error: {'ok': 0, 'errmsg': 'user is not allowed to do action [serverStatus] on [admin.]', 'code': 8000, 'codeName': 'AtlasError'}

üîê Conexi√≥n cerrada correctamente


In [11]:
# Seleccionamos base de datos y colecci√≥n
db = client["sample_airbnb"]
collection = db["listingsAndReviews"]

In [12]:
# --- EXTRACT ---
# Extraemos todos los documentos de la colecci√≥n
docs = list(collection.find())

In [13]:
# --- TRANSFORM ---
# 1. CSV: solo campos descriptivos y metadatos elementales de cada propiedad
elemental_keys = [
  "_id",
  "listing_url",
  "name",
  "property_type",
  "room_type",
  "bed_type"
]

csv_rows = []
for doc in docs:
    filtered = {"_id": str(doc["_id"])}
    for key in elemental_keys:
        if key in doc and key != "_id":
            filtered[key] = doc[key]
    csv_rows.append(filtered)

# 2. Dimensiones: objetos/arrays descriptivos (atributos est√°ticos)
dimension_keys = [
  "summary",
  "space",
  "description",
  "neighborhood_overview",
  "notes",
  "transit",
  "access",
  "interaction",
  "house_rules",
  "minimum_nights",
  "maximum_nights",
  "cancellation_policy",
  "amenities",
  "images",
  "host",
  "address",
]

dimension_docs = []
for doc in docs:
    filtered = {"_id": str(doc["_id"])}
    for key in dimension_keys:
        if key in doc:
            filtered[key] = doc[key]
    dimension_docs.append(filtered)

# 3. Hechos: m√©tricas y eventos din√°micos (cambian con el tiempo)
fact_keys = [
  "last_scraped",
  "calendar_last_scraped",
  "first_review",
  "last_review",
  "accommodates",
  "bedrooms",
  "beds",
  "bathrooms",
  "number_of_reviews",
  "price",
  "security_deposit",
  "cleaning_fee",
  "extra_people",
  "guests_included",
  "availability",
  "review_scores",
  "reviews"
]

fact_docs = []
for doc in docs:
    filtered = {"_id": str(doc["_id"])}
    for key in fact_keys:
        if key in doc:
            filtered[key] = doc[key]
    fact_docs.append(filtered)

In [14]:
import json
import pandas as pd
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from bson import json_util

In [16]:
# --- LOAD ---
# Guardamos CSV con atributos escalares
df = pd.DataFrame(csv_rows)
df

Unnamed: 0,_id,listing_url,name,property_type,room_type,bed_type
0,10006546,https://www.airbnb.com/rooms/10006546,Ribeira Charming Duplex,House,Entire home/apt,Real Bed
1,10009999,https://www.airbnb.com/rooms/10009999,Horto flat with small garden,Apartment,Entire home/apt,Real Bed
2,1001265,https://www.airbnb.com/rooms/1001265,Ocean View Waikiki Marina w/prkg,Condominium,Entire home/apt,Real Bed
3,10021707,https://www.airbnb.com/rooms/10021707,Private Room in Bushwick,Apartment,Private room,Real Bed
4,10030955,https://www.airbnb.com/rooms/10030955,Apt Linda Vista Lagoa - Rio,Apartment,Private room,Real Bed
...,...,...,...,...,...,...
5550,9983221,https://www.airbnb.com/rooms/9983221,Cozy apartment downtown Porto,Apartment,Entire home/apt,Real Bed
5551,9985696,https://www.airbnb.com/rooms/9985696,Kadƒ±k√∂y-Altƒ±yol,House,Entire home/apt,Real Bed
5552,9987200,https://www.airbnb.com/rooms/9987200,The best suite in Copacabana (total privacy),Aparthotel,Private room,Real Bed
5553,9990304,https://www.airbnb.com/rooms/9990304,March Madness Special! Aina Nalu Platinum D107,Condominium,Entire home/apt,Real Bed
