In [1]:
import os
import uuid
import pandas as pd
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()


# ✅ Assign the API key
openai_api_key = os.getenv("OPENAI_API_KEY2")

# ✅ Use it here
openai_client = OpenAI(
    api_key=openai_api_key
)

# ✅ Qdrant setup
client = QdrantClient(
    host="vps.maestri.com.co",
    port=6333,
    https=False
)

collection_name = "maestri_knowledge"
embedding_size = 1536  # For OpenAI text-embedding-3-small model

# ✅ Recreate Qdrant collection
client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=embedding_size, distance=Distance.COSINE),
)

# ✅ Business documents
documents = [
    {"category": "contacto", "text": "Horarios: Lunes a Viernes de 10:00 A.M a 6:30 P.M. Sábados y domingos de 10:00 A.M a 6:00 P.M."},
    {"category": "contacto", "text": "Dirección: CRA 27A # 68-87 Bogotá DC Colombia. Email: josue.santamaria@maestri.co. Whatsapp: (1) 7328309 - (+57) 3154831517."},
    {"category": "nosotros", "text": "Maestri Milano es una boutique de vinos y alimentos gourmet que lleva a tu mesa una auténtica experiencia gastronómica a la italiana."},
    {"category": "nosotros", "text": "Desde 2019 producimos charcutería artesanal italiana en Colombia con estándares y recetas milenarias italianas."},
    {"category": "nosotros", "text": "Ofrecemos vinos de más de 40 denominaciones y 80 referencias exclusivas, junto a alimentos como Parmigiano Reggiano, trufa y aceite extra virgen."},
    {"category": "descuentos", "text": "Ofrecemos descuentos por cantidad, 5% de descuento en compras de 6 botellas, 10% en compras de 12 botellas"},
    {"category": "servicios", "text": "Ofrecemos servicios de catas de vino y charcutería, asesoría en selección de productos, y catering para eventos. Habla con un asesor para más información."},
    {
        "category": "productos",
        "text": "En Maestri Milano encontrarás una exclusiva selección de productos italianos gourmet: vinos importados y vinos de la casa, charcutería, quesos, pastas, trufas, galletas italianas, carnes curadas, panettone, alimentos gourmet, anchetas, cajas de regalo, cristalería y acceso al Maestri Wine Club. Cada producto ha sido cuidadosamente elegido para brindarte una experiencia auténtica y sofisticada."
    },
    {
        "category": "tiempo de entrega",
        "text": "Ofrecemos dos opciones de envío: Envío normal por $8.000 con entrega entre 24 y 48 horas, o Envío express por $12.000 con entrega rápida entre 3 y 6 horas. Elige la opción que mejor se adapte a tu necesidad."
    },
    {"category": "equipo", "text": "Riccardo Trentini – Sales & Finance"},
    {"category": "equipo", "text": "Carlo Angius – Process Optimization & Markets"},
    {"category": "equipo", "text": "Danilo Marotta – Administración y Producción"},
    {"category": "equipo", "text": "Manuel Ghiddi – Maestro Charcutero"}
]

# ✅ Prepare texts for batch embedding
texts = [doc["text"] for doc in documents]

# ✅ Get embeddings from OpenAI
response = openai_client.embeddings.create(
    input=texts,
    model="text-embedding-3-small"
)

# ✅ Prepare points for Qdrant
points = []
for i, embedding_obj in enumerate(response.data):
    vector = embedding_obj.embedding
    point_id = str(uuid.uuid4())
    payload = {
        "text": documents[i]["text"],
        "category": documents[i]["category"]
    }
    points.append(PointStruct(id=point_id, vector=vector, payload=payload))

# ✅ Upsert into Qdrant
client.upsert(collection_name=collection_name, points=points)

print(f"✅ Inserted {len(points)} documents into Qdrant collection: {collection_name}")


  client.recreate_collection(


✅ Inserted 13 documents into Qdrant collection: maestri_knowledge


In [32]:
import requests
import logging
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build

# ---------------------------
# CONFIGURATION
# ---------------------------
MERCHANT_ID = "441343742"   # <- replace with your Merchant Center Account ID

SERVICE_ACCOUNT_FILE = r"C:\Users\dhernandez\OneDrive - Standards IT\Documents\GitHub\fastapi-maestri\service_account.json"
SCOPES = ["https://www.googleapis.com/auth/content"]

credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES
)

# Build Google Content API client
service = build("content", "v2.1", credentials=credentials)

WEBFLOW_API_TOKEN = "026a04fef179155b6a04fbfd49e07c722e7621b91ad98961f6f298987c070180"
COLLECTION_ID = "6660d3a96fe3b376c162563e"
BASE_URL = f"https://api.webflow.com/v2/collections/{COLLECTION_ID}/items"
HEADERS = {
    "Authorization": f"Bearer {WEBFLOW_API_TOKEN}",
    "accept-version": "2.0.0"
}

logger = logging.getLogger(__name__)

# ---------------------------
# HELPERS
# ---------------------------
def clean(val):
    if pd.isna(val) or val is None:
        return ""
    return str(val).strip()

def strip_html(text):
    import re
    cleanr = re.compile("<.*?>")
    return re.sub(cleanr, "", text or "")

# ---------------------------
# FETCH ALL WEBFLOW ITEMS
# ---------------------------
def get_all_webflow_items():
    offset = 0
    limit = 100
    all_items = []

    while True:
        response = requests.get(
            BASE_URL, headers=HEADERS, params={"offset": offset, "limit": limit}
        )
        response.raise_for_status()
        data = response.json()
        items = data.get("items", [])
        all_items.extend(items)

        if len(items) < limit:
            logger.info("All items fetched.")
            break

        offset += limit

    return all_items

# ---------------------------
# MAP WEBFLOW → GOOGLE MERCHANT (Content API format)
# ---------------------------
def webflow_to_product(row):
    fd = row.get("fieldData", {})  # shortcut

    # Core fields
    id = clean(row.get("id"))
    product_name = clean(fd.get("name"))
    precio_raw = clean(fd.get("precio"))
    slug = clean(fd.get("slug"))

    # Image handling
    imagen_data = fd.get("imagen-del-producto")
    url_imagen = ""

    if isinstance(imagen_data, dict):
        url_imagen = clean(imagen_data.get("url"))
    elif isinstance(imagen_data, list) and len(imagen_data) > 0 and isinstance(imagen_data[0], dict):
        # Some Webflow collections return images as a list of dicts
        url_imagen = clean(imagen_data[0].get("url"))

    if not url_imagen.startswith("http"):
        url_imagen = "https://maestri.com.co/default-image.jpg"

    # ✅ Normalize price
    try:
        precio = float(precio_raw.replace(",", "").replace("$", "")) if precio_raw else 0.0
    except:
        precio = 0.0

    # ✅ Build link
    url = f"https://maestri.com.co/products/{slug}" if slug else ""

    # ✅ Build description
    bodega = clean(fd.get("bodega"))
    tipo = clean(fd.get("tipo"))
    maridaje1 = clean(fd.get("maridaje-1"))
    maridaje2 = clean(fd.get("maridaje-2"))
    maridaje = " y ".join([m for m in [maridaje1, maridaje2] if m])
    notas = clean(fd.get("notas-de-cata"))
    gr_ml = clean(fd.get("gr-ml"))
    ocasion = clean(fd.get("ocasion"))
    descripcion_html = fd.get("descripcion")
    descripcion_texto = strip_html(descripcion_html)

    description_parts = [
        f"{product_name} - {bodega}" if product_name else "",
        f"Tipo: {tipo}" if tipo else "",
        f"Notas: {notas}" if notas else "",
        f"Maridaje: {maridaje}" if maridaje else "",
        f"Ocasión: {ocasion}" if ocasion else "",
        f"Contenido: {gr_ml}" if gr_ml else "",
        descripcion_texto,
    ]
    description = " | ".join([d for d in description_parts if d])

    # ✅ Availability logic
    if row.get("isArchived") or row.get("isDraft"):
        availability = "out of stock"
    else:
        availability = "in stock"

    return {
        "offerId": id,
        "title": product_name or "Producto Maestri",
        "description": description or "Producto disponible en Maestri Milano.",
        "link": url,
        "imageLink": url_imagen if url_imagen.startswith("http") else "https://maestri.com.co/default-image.jpg",
        "contentLanguage": "es",
        "targetCountry": "CO",
        "channel": "online",
        "availability": availability,
        "condition": "new",
        "price": {"value": f"{precio:.2f}", "currency": "COP"},
    }

# ---------------------------
# UPSERT TO GOOGLE MERCHANT
# ---------------------------
def upsert_products_to_merchant():
    webflow_items = get_all_webflow_items()

    for item in webflow_items:
        product = webflow_to_product(item)
        try:
            response = service.products().insert(
                merchantId=MERCHANT_ID, body=product
            ).execute()
            print(f"✅ Synced: {product['offerId']} → {response.get('id')}")
        except Exception as e:
            print(f"❌ Failed to sync {product['offerId']}: {e}")

if __name__ == "__main__":
    upsert_products_to_merchant()


# def test_connection():
#     try:
#         response = service.products().list(merchantId=MERCHANT_ID, maxResults=5).execute()
#         print("✅ Connection OK. Example products:")
#         for p in response.get("resources", []):
#             print("-", p.get("id"), p.get("title"))
#     except Exception as e:
#         print("❌ Connection failed:", e)

# if __name__ == "__main__":
#     test_connection()

✅ Synced: 689a4619f740d0df41729d37 → online:es:CO:689a4619f740d0df41729d37
✅ Synced: 689a450172dc1bda3077933e → online:es:CO:689a450172dc1bda3077933e
✅ Synced: 689a43b6585d4b0bf7ce0997 → online:es:CO:689a43b6585d4b0bf7ce0997
✅ Synced: 689a42af654bc80d576a1811 → online:es:CO:689a42af654bc80d576a1811
✅ Synced: 689a416f1982a66dabd8b4f3 → online:es:CO:689a416f1982a66dabd8b4f3
✅ Synced: 689a3f171982a66dabd74b95 → online:es:CO:689a3f171982a66dabd74b95
✅ Synced: 689a3be390cef181b82ff4f3 → online:es:CO:689a3be390cef181b82ff4f3
✅ Synced: 689a39951cfdaa48abbf1c1c → online:es:CO:689a39951cfdaa48abbf1c1c
✅ Synced: 689a380f1d519124bce3b214 → online:es:CO:689a380f1d519124bce3b214
✅ Synced: 688450c19b8aeb3a1e35866b → online:es:CO:688450c19b8aeb3a1e35866b
✅ Synced: 688450c01b3a6638d0e35df2 → online:es:CO:688450c01b3a6638d0e35df2
✅ Synced: 688450c02b80f368ffd89293 → online:es:CO:688450c02b80f368ffd89293
✅ Synced: 688450bf0e7170ac938dd35a → online:es:CO:688450bf0e7170ac938dd35a
✅ Synced: 688450be5664079