In [19]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [6]:
import pandas as pd
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
from bs4 import BeautifulSoup
import uuid
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

# ✅ Assign the API key
openai_api_key = os.getenv("OPENAI_API_KEY2")

# ✅ Use it here
openai_client = OpenAI(
    api_key=openai_api_key
)

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PayloadSchemaType

# 1. Connect to Qdrant
client = QdrantClient(host="vps.maestri.com.co", port=6333, https=False)

# 2. Collection name and vector size
collection_name = "user_history"
embedding_size = 1536  # If you're using OpenAI's text-embedding-3-small

# 3. Check if the collection exists and delete it (optional)
if client.collection_exists(collection_name):
    print(f"🗑️ Deleting existing collection '{collection_name}'...")
    client.delete_collection(collection_name)

# 4. Create collection
print(f"🛠️ Creating collection '{collection_name}'...")
client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=embedding_size, distance=Distance.COSINE),
    on_disk_payload=True
)
print(f"✅ Collection '{collection_name}' created.")

# 5. Define and create payload indexes (metadata fields)
fields = {
    "question": PayloadSchemaType.TEXT,
    "answer": PayloadSchemaType.TEXT,
    "subscriber_id": PayloadSchemaType.KEYWORD,
    "product_ids": PayloadSchemaType.KEYWORD,  # list of product slugs or IDs
    "timestamp": PayloadSchemaType.TEXT
}

print("🔧 Creating payload indexes...")
for field_name, field_type in fields.items():
    client.create_payload_index(
        collection_name=collection_name,
        field_name=field_name,
        field_schema=field_type
    )
print("✅ All payload indexes created successfully.")


🛠️ Creating collection 'user_history'...
✅ Collection 'user_history' created.
🔧 Creating payload indexes...
✅ All payload indexes created successfully.


In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, VectorParams, Distance
from bs4 import BeautifulSoup
import pandas as pd
import uuid
from openai import OpenAI # ✅ add openai import


# Utility functions
def clean(text):
    if pd.isna(text): return ""
    return str(text).strip()

def strip_html(text):
    return BeautifulSoup(text, "html.parser").get_text(separator=" ", strip=True)


def get_openai_embedding(text: str) -> list:
    response = openai_client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding

# Load CSV
df = pd.read_csv("/Users/diegohernandez/Downloads/Maestri Milano - Products.csv")
df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]

# Connect to Qdrant
client = QdrantClient(host="vps.maestri.com.co", port=6333, https=False)
collection_name = "maestri_products"
embedding_size = 1536  # ✅ OpenAI embedding size is 1536

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=embedding_size, distance=Distance.COSINE),
)

points = []

for _, row in df.iterrows():
    if pd.isna(row.get("published_on")):
        continue

    product_name = clean(row.get("nombre"))
    bodega = clean(row.get("bodega"))
    region = clean(row.get("región"))
    tipo = clean(row.get("tipo"))
    maridaje1 = clean(row.get("maridaje_1"))
    maridaje2 = clean(row.get("maridaje_2"))
    maridaje = " y ".join([m for m in [maridaje1, maridaje2] if m])
    notas = clean(row.get("notas_de_cata"))
    descripcion = strip_html(clean(row.get("descripción")))
    precio = clean(row.get("precio"))
    category = clean(row.get("pasillo"))
    alternate_names = ""  

    short_text = f"""\
                Producto: {product_name}. Tipo: {tipo}. Bodega: {bodega}. Región: {region}.
                Maridaje: {maridaje}. Notas: {notas}. Descripción: {descripcion}.
                También conocido como: {alternate_names}. Precio: {precio}.
                """

    if not short_text.strip():
        continue

    # ✅ Now using OpenAI to generate the embedding
    vector = get_openai_embedding(short_text)

    payload = {
        "product_name": product_name,
        "bodega": bodega,
        "region": region,
        "tipo": tipo,
        "precio": precio,
        "notas": notas,
        "descripcion": descripcion,
        "maridaje": maridaje,
        "category": category,
        "alternate_names": alternate_names
    }

    points.append(PointStruct(id=str(uuid.uuid4()), vector=vector, payload=payload))

# Insert into Qdrant
client.upsert(collection_name=collection_name, points=points)
print(f"✅ Inserted {len(points)} products into Qdrant collection: {collection_name}")


In [None]:
df.columns

In [None]:
# Step 1 - Inspect products with "prosecco" in product_name
print("\n🔍 Checking indexed Prosecco products by name:\n")

results, _ = client.scroll(
    collection_name=collection_name,
    limit=100,
    with_payload=True
)

prosecco_items = [
    r for r in results if "prosecco" in str(r.payload.get("product_name", "")).lower()
]

if not prosecco_items:
    print("❌ No Prosecco products found in product_name.")
else:
    for i, item in enumerate(prosecco_items, 1):
        print(f"\n#{i}")
        for k, v in item.payload.items():
            print(f"{k}: {v}")


In [None]:
import numpy as np
import os
import re
import logging
import traceback
from typing import List
import requests

from qdrant_client.http.models import VectorParams, Distance
from qdrant_client import QdrantClient
from openai import OpenAI
from dotenv import load_dotenv
from fastapi import HTTPException


WEBFLOW_API_TOKEN = "026a04fef179155b6a04fbfd49e07c722e7621b91ad98961f6f298987c070180"
COLLECTION_ID = "6660d3a96fe3b376c162563e"
BASE_URL = f"https://api.webflow.com/v2/collections/{COLLECTION_ID}/items"

HEADERS = {
    "Authorization": f"Bearer {WEBFLOW_API_TOKEN}",
    "accept-version": "2.0.0"
}

def get_all_webflow_items():
    offset = 0
    limit = 100
    all_items = []

    while True:
        response = requests.get(
            BASE_URL,
            headers=HEADERS,
            params={"offset": offset, "limit": limit}
        )
        response.raise_for_status()
        data = response.json()
        items = data.get("items", [])

        all_items.extend(items)
        print(f"✅ Fetched {len(items)} items at offset {offset}")

        if len(items) < limit:
            break

        offset += limit

    return all_items

# Define top-level fields (outside fieldData)
top_fields = [
    "id",
    "lastPublished",
    "lastUpdated",
    "isArchived",
    "isDraft"
]

# Define selected fieldData fields
fielddata_fields = [
    "gr-ml",
    "ocasion",  # make sure spelling is consistent with Webflow — was 'ocasion' or 'occasion'?
    "precio",
    "maridaje-1",
    "maridaje-2",
    "pasillo",
    "bodega",
    "name",
    "descripcion",
    "notas-de-cata",
    "tipo",
    "slug",
    "descuento",
    "descuento-2x1",
    "descuento-3x2",
    "productoreserva",
    "descuento-off"
]

# Function to flatten and extract only desired fields
def extract_selected_fields(item):
    row = {}

    # Top-level fields
    for field in top_fields:
        row[field] = item.get(field)

    # From fieldData
    field_data = item.get("fieldData", {})
    for field in fielddata_fields:
        row[field] = field_data.get(field)
    
    # ✅ Extract image URL from 'imagen-del-producto'
    image_info = field_data.get("imagen-del-producto", {})
    if isinstance(image_info, dict):
        row["imagen_url"] = image_info.get("url")
    else:
        row["imagen_url"] = None

    return row


# Get and transform all items
items = get_all_webflow_items()
flattened_data = [extract_selected_fields(item) for item in items]

# Create the DataFrame
df = pd.DataFrame(flattened_data)

df
# Utility functions
def clean(text):
    if pd.isna(text): return ""
    return str(text).strip()

def strip_html(text):
    return BeautifulSoup(text, "html.parser").get_text(separator=" ", strip=True)


def get_openai_embedding(text: str) -> list:
    response = openai_client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding

# Connect to Qdrant
client = QdrantClient(host="vps.maestri.com.co", port=6333, https=False)
collection_name = "maestri_products"
embedding_size = 1536  # ✅ OpenAI embedding size is 1536

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=embedding_size, distance=Distance.COSINE),
)

points = []

for _, row in df.iterrows():
    if pd.isna(row.get("name")) or pd.isna(row.get("precio")) or row.get("isArchived") is True or row.get("isDraft") is True or row.get("lastPublished") is None:
        continue
    id = row.get("id")
    product_name = clean(row.get("name"))
    bodega = clean(row.get("bodega"))
    tipo = clean(row.get("tipo"))
    maridaje1 = clean(row.get("maridaje-1"))
    maridaje2 = clean(row.get("maridaje-2"))
    maridaje = " y ".join([m for m in [maridaje1, maridaje2] if m])
    notas = clean(row.get("notas-de-cata"))
    descripcion = strip_html(clean(row.get("descripcion")))
    precio = clean(row.get("precio"))
    category = clean(row.get("pasillo"))
    gr_ml = clean(row.get("gr-ml"))
    ocasion = clean(row.get("ocasion"))
    slug = clean(row.get("slug"))
    url = f"https://maestri.com.co/products/{slug}" if slug else ""

    descuento = bool(row.get("descuento", False))
    descuento_2x1 = bool(row.get("descuento-2x1", False))
    descuento_3x2 = bool(row.get("descuento-3x2", False))
    productoreserva = bool(row.get("productoreserva", False))
    descuento_off = bool(row.get("descuento-off", False))
    url_imagen = row.get("imagen_url", "")
    if pd.isna(url_imagen):
        url_imagen = ""

    alternate_names = ""

    short_text = f"""Producto: {product_name}. Tipo: {tipo}. Bodega: {bodega}.
    Maridaje: {maridaje}. Notas: {notas}. Descripción: {descripcion}.
    También conocido como: {alternate_names}. Precio: {precio}. GR/ML: {gr_ml}. Ocasion: {ocasion}.
    """

    if not short_text.strip():
        continue

    # Get vector
    vector = get_openai_embedding(short_text)

    # Payload for Qdrant
    payload = {
        "id": id,
        "product_name": product_name,
        "bodega": bodega,
        "tipo": tipo,
        "precio": precio,
        "notas": notas,
        "descripcion": descripcion,
        "maridaje": maridaje,
        "category": category,
        "gr_ml": gr_ml,
        "ocasion": ocasion,
        "url": url,
        "descuento": descuento,
        "descuento_2x1": descuento_2x1,
        "descuento_3x2": descuento_3x2,
        "productoreserva": productoreserva,
        "descuento_off": descuento_off,
        "alternate_names": alternate_names,
        "url_imagen": url_imagen
    }


    points.append(PointStruct(id=str(uuid.uuid4()), vector=vector, payload=payload))

# Insert into Qdrant
client.upsert(collection_name=collection_name, points=points)
print(f"✅ Inserted {len(points)} products into Qdrant collection: {collection_name}")


In [None]:
import os
import numpy as np
from qdrant_client import QdrantClient
from openai import OpenAI
from dotenv import load_dotenv

# === Load env variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY2")
if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not set in .env")

# === Initialize OpenAI and Qdrant client
openai_client = OpenAI(api_key=openai_api_key)
qdrant = QdrantClient(host="vps.maestri.com.co", port=6333, https=False)

# === Define search query
query = "Prosecco Sior Sandro"  # 🔁 Replace with your test product
collection = "maestri_products"

# === Create embedding
embedding_response = openai_client.embeddings.create(
    model="text-embedding-3-small",
    input=query
)
vector = embedding_response.data[0].embedding

# === Search in Qdrant
results = qdrant.search(
    collection_name=collection,
    query_vector=vector,
    limit=1,
    with_payload=True,
    with_vectors=False
)

# === Print result
if results:
    print("✅ Product found:")
    for key, value in results[0].payload.items():
        print(f"{key}: {value}")
else:
    print("❌ No product found.")
