## Imports y conexión a la DB

In [31]:
import os
from pymongo import MongoClient
import pandas as pd
import plotly.express as px

MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017")
DB_NAME = os.getenv("MONGO_DB")
COLL_NAME = os.getenv("MONGO_COLLECTION")

client = MongoClient(MONGO_URI)
db = client["foodfacts_db"]
col = db["products"]
#Para asegurarnos de que la conexión fue exitosa
print("Conectando a MongoDB:", MONGO_URI)
print("DB:", DB_NAME, "Colección:", COLL_NAME)

Conectando a MongoDB: mongodb://localhost:27017
DB: foodfacts_db Colección: products


## Distribución Nutri-Score

In [32]:
pipeline = [
    {"$match": {"nutriscore_grade": {"$exists": True, "$ne": None}}},
    {"$group": {"_id": "$nutriscore_grade", "count": {"$sum": 1}}},
    {"$sort": {"_id": 1}}
]
res = list(col.aggregate(pipeline))
df_ns = pd.DataFrame(res)
df_ns["nutriscore"] = df_ns["_id"]
fig = px.pie(df_ns, names="nutriscore", values="count", title="Distribución Nutri-Score")
fig.show()


## Calorías vs Azúcar (Scatter)

In [33]:
cursor = col.find({
    "nutriments.sugars_100g": {"$exists": True},
    "nutriments.energy_kcal_100g": {"$exists": True}
}, {"product_name":1, "nutriments.sugars_100g":1, "nutriments.energy_kcal_100g":1}).limit(20000)

df = pd.json_normalize(list(cursor))
fig = px.scatter(df, x="nutriments.energy_kcal_100g", y="nutriments.sugars_100g", hover_name="product_name", title="Calorías (kcal) vs Azúcar (g) por 100g")
fig.show()


## Promedio de azúcar por categoría (top 20, con mínimo 100 productos)

In [34]:
category_count = 50 #Si la ejecución no devuelve un gráfico, disminuir este número
pipeline = [
    {"$match": {"nutriments.sugars_100g": {"$exists": True, "$ne": None}}}, 
    {"$unwind": "$categories_tags"},
    {"$group": {"_id": "$categories_tags", "avg_sugars": {"$avg": "$nutriments.sugars_100g"}, "count": {"$sum": 1}}},
    {"$match": {"count": {"$gte": category_count}}}, 
    {"$sort": {"avg_sugars": -1}},
    {"$limit": 20}
]

res = list(col.aggregate(pipeline))
df_cat = pd.DataFrame(res)
fig = px.bar(df_cat, x="_id", y="avg_sugars", title=f"Promedio de azúcares por categoría (>={category_count} productos)")
fig.update_layout(xaxis_tickangle=-45)
fig.show()
