# Consultar datos de MySQL e cargalos en MongoDB (Notebook 4)

## Desde MySQL, selecciona a táboa de datos inserida.

In [None]:
# Requerimentos

%pip install pandas
%pip install pymongo

Collecting pymongo
  Downloading pymongo-4.15.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Using cached dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading pymongo-4.15.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m13.6 MB/s[0m  [33m0:00:00[0m
[?25hUsing cached dnspython-2.8.0-py3-none-any.whl (331 kB)
Installing collected packages: dnspython, pymongo
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pymongo]m1/2[0m [pymongo]
[1A[2KSuccessfully installed dnspython-2.8.0 pymongo-4.15.5
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Conexión á BD MySQL

import mysql.connector


# Nomes e ferrollos da Conexión
HOST = "127.0.0.1"
USER = "root"
PASSWORD = "root_password" 
DATABASE = "data_pipeline_db"

# Conectamos coa nosa BD
try:
    mydb = mysql.connector.connect(
        host=HOST,
        user=USER,
        password=PASSWORD,
        database=DATABASE
    )
    cursor = mydb.cursor()
    print("Afeixado á BD MySQL '%s' como %s@%s" %(DATABASE, USER,HOST))

# Xestor groseiro de erros

except mysql.connector.Error as err:
    print(f"Erro de MySQL: {err}")
    if '1049' in str(err):
         print("Non atopo a base de datos 'data_pipeline_db'.")

Afeixado á BD MySQL 'data_pipeline_db' como root@127.0.0.1


In [9]:
# Copiamos o contido da táboa 'books' en cursor.

import pandas as pd

sql_query = "SELECT * FROM books"
cursor.execute(sql_query)
query_data = cursor.fetchall()
column_names = [i[0] for i in cursor.description]


# Creamos o Dataframe 'df_books' cos datos que vimos de gardar.

df_books = pd.DataFrame(query_data, columns=column_names)

# Amosámo-los resultados.
print("DataFrame 'df_books' arrebolado. Total rexistros:", len(df_books))
print("Amosando os primeiros 5 rexistros:")
print(df_books.head())


DataFrame 'df_books' arrebolado. Total rexistros: 271065
Amosando os primeiros 5 rexistros:
         isbn                                              title  \
0  0000913154  The Way Things Work: An Illustrated Encycloped...   
1  0001010565                                    Mog's Christmas   
2  0001046438                                               Liar   
3  0001046713                       Twopence to Cross the Mersey   
4  000104687X  T.S. Eliot Reading \The Wasteland\" and Other ...   

                          author  year_of_publication  \
0  C. van Amerongen (translator)                 1967   
1                    Judith Kerr                 1992   
2                    Stephen Fry                    0   
3                Helen Forrester                 1992   
4                     T.S. Eliot                 1993   

                  publisher  
0      Simon &amp; Schuster  
1                   Collins  
2          Harpercollins Uk  
3  HarperCollins Publishers  
4  Har

## Conéctate á base de datos MongoDB usando Python.

In [10]:
from pymongo import MongoClient

# Conexión a MongoDB 
try:
    mongo_client = MongoClient("mongodb://mongo_user:mongo_password@localhost:27017/")
    print("✅ Conectado a MongoDB")
except Exception as e:
   print("❌ Erro de conexión a MongoDB: {e}")



✅ Conectado a MongoDB


## Crea unha colección.

In [15]:
# Crear ou selecciona-la Colección (creación preguiceira)
db_mongo = mongo_client['db_libros']
coleccion = db_mongo['libros']

# Verificar conexión
print(f"Bases de datos dispoñibles: {mongo_client.list_database_names()}")

Bases de datos dispoñibles: ['admin', 'config', 'local']


Insire documentos a partir de filas do teu DataFrame.## 

In [None]:
#  O enunciado pide un DataFrame
#    mais asemella que sería máis doado facer un dicionario empregando Cursor.

# Cargamos un dicionario co DataFrame
datos_dicionario = df_books.to_dict(orient='records')


if datos_dicionario:
    resultado = coleccion.insert_many(datos_dicionario)
    print(f"Éxito: {len(resultado.inserted_ids)} libros arrebolados.")




Éxito: 271065 libros arrebolados.


Verifica realizando consultas simples (find, count, etc.).

In [37]:
# Confirmar que se gardou (persistencia)

if 'db_libros' in mongo_client.list_database_names():
    print("✅ A BD 'db_libros' está gardada.\n")

print(f"Total de documentos: ", coleccion.count_documents({}))

print("\nAmosando os 5 primeiros libros da Colección:")
print("=============================================")
for doc in coleccion.find().limit(5):
    print(doc)

# libros cuxo tíduo conteña a verba 'ontology' (separada)
#  ignora maiúsculas/minúsculas

print("\nLibros que conteñen a verba 'ontology':")
print("========================================")
query = {"title": {"$regex": r"\bontology\b", "$options": "i"}}

resultados = coleccion.find(query)

for libro in resultados:
    print(libro['title'])

# total = coleccion.count_documents({})
# print(f"Total de documentos: {total}")

✅ A BD 'db_libros' está gardada.

Total de documentos:  271065

Amosando os 5 primeiros libros da Colección:
{'_id': ObjectId('6953a1da4b1cfc743afa3abd'), 'isbn': '0000913154', 'title': 'The Way Things Work: An Illustrated Encyclopedia of Technology', 'author': 'C. van Amerongen (translator)', 'year_of_publication': 1967, 'publisher': 'Simon &amp; Schuster'}
{'_id': ObjectId('6953a1da4b1cfc743afa3abe'), 'isbn': '0001010565', 'title': "Mog's Christmas", 'author': 'Judith Kerr', 'year_of_publication': 1992, 'publisher': 'Collins'}
{'_id': ObjectId('6953a1da4b1cfc743afa3abf'), 'isbn': '0001046438', 'title': 'Liar', 'author': 'Stephen Fry', 'year_of_publication': 0, 'publisher': 'Harpercollins Uk'}
{'_id': ObjectId('6953a1da4b1cfc743afa3ac0'), 'isbn': '0001046713', 'title': 'Twopence to Cross the Mersey', 'author': 'Helen Forrester', 'year_of_publication': 1992, 'publisher': 'HarperCollins Publishers'}
{'_id': ObjectId('6953a1da4b1cfc743afa3ac1'), 'isbn': '000104687X', 'title': 'T.S. Eliot