In [1]:
import os
import pandas as pd

from dotenv import load_dotenv
from pymongo import MongoClient
from tqdm import tqdm

In [2]:
df_baja = pd.read_excel("Baja.xlsx")
df_pltu = pd.read_excel("DATASETPLTU.xlsx")

In [3]:
load_dotenv(".env")
client = MongoClient(
    os.getenv("MONGO_CONNECTION_STRING"),
    serverSelectionTimeoutMS=300000
)
db = client[os.getenv("MONGO_DATABASE_NAME")]
collection_baja = db[os.getenv("MONGO_COLLECTION_NAME_BAJA")]
collection_pltu = db[os.getenv("MONGO_COLLECTION_NAME_PLTU")]

In [4]:
def store_to_mongo(df, collection, mode="append"):
    """
    Stores a dataframe to a MongoDB collection.

    Parameters:
    df (pd.DataFrame): The dataframe to be stored.
    collection (pymongo.collection.Collection): The MongoDB collection.
    mode (str): The mode of operation - 'replace' to replace existing data, 'append' to add to existing data.
    """
    # Convert dataframe to list of dictionaries with progress bar
    data_dict = [row for row in tqdm(df.to_dict(orient="records"), desc="Converting dataframe")]

    if mode == "replace":
        # Delete all existing documents in the collection
        collection.delete_many({})
        print("Existing data replaced.")
    
    # Insert all documents at once
    collection.insert_many(data_dict)
    print(f"{len(data_dict)} documents inserted.")

def load_from_mongo(collection):
    """
    Loads data from a MongoDB collection into a pandas dataframe.

    Parameters:
    collection (pymongo.collection.Collection): The MongoDB collection.

    Returns:
    pd.DataFrame: The loaded dataframe.
    """
    cursor = collection.find()
    df = pd.DataFrame(list(cursor))
    
    # Remove the MongoDB "_id" field if present
    if "_id" in df.columns:
        df.drop("_id", axis=1, inplace=True)
    
    return df

In [5]:
store_to_mongo(df_baja, collection_baja, mode="replace")

Converting dataframe: 100%|██████████| 986/986 [00:00<00:00, 5596189.10it/s]


Existing data replaced.
986 documents inserted.


In [6]:
store_to_mongo(df_pltu, collection_pltu, mode="replace")

Converting dataframe: 100%|██████████| 300/300 [00:00<00:00, 1596816.24it/s]


Existing data replaced.
300 documents inserted.


In [7]:
df_baja_loaded = load_from_mongo(collection_baja)
df_pltu_loaded = load_from_mongo(collection_pltu)