In [None]:
import sys
from pathlib import Path

ROOT = Path("..").resolve()
SRC = ROOT / "src"
sys.path.insert(0, str(SRC))

print("ROOT:", ROOT)
print("SRC:", SRC)

In [None]:
from data_simulator import SimConfig, generate_items, generate_search_events, save_artifacts

cfg = SimConfig(n_items=100_000, n_sellers=5_000, n_events=10_000, seed=42)
items = generate_items(cfg)
events = generate_search_events(cfg, items)

items.head()


In [None]:
(ROOT / "artifacts").mkdir(exist_ok=True)
save_artifacts(items, events, out_dir=str(ROOT / "artifacts"))
print("Saved artifacts!")

In [None]:
from features import build_item_360

item_360, report = build_item_360(items)
item_360.head(), report


In [None]:
out_path = ROOT / "artifacts" / "item_360.parquet"
item_360.to_parquet(out_path, index=False)
print("Saved:", out_path)


In [None]:
import pandas as pd

item_360 = pd.read_parquet(ROOT / "artifacts" / "item_360.parquet")
item_360.head()


In [None]:
# generando embeddings + armando el índice (FAISS)

In [None]:
from retrieval import build_retrieval_artifacts, search

art = build_retrieval_artifacts(item_360)
hits = search("laptop gamer i7 16gb", art, top_k=10, max_price=3000)
hits.head(10)


In [None]:
type(art), getattr(art, "embeddings", None).shape

In [None]:
hits.head(10)

In [None]:
hits[["item_id","title","price","category","brand","model"]].head(10)

In [None]:
len(hits), hits.shape

In [None]:
item_360["price"].describe()

In [None]:
from retrieval import build_retrieval_artifacts
from context_engine import run_context_engine

art = build_retrieval_artifacts(item_360)

res = run_context_engine("laptop gamer i7 16gb", art, top_k=10, max_price=6000)
res.meta, res.hits.head(5), res.context_text[:500]


In [None]:
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "")


In [None]:
from crew_app import build_crew
from context_engine import run_context_engine

res = run_context_engine("laptop gamer i7 16gb", art, top_k=10, max_price=6000)

crew = build_crew()
out = crew.kickoff(inputs={"query": res.query, "context": res.context_text})
out


In [None]:
from pathlib import Path

ROOT = Path("..").resolve()
ART = ROOT / "artifacts"
sorted([p.name for p in ART.glob("*")])[:50], ART


In [None]:
from pathlib import Path

ROOT = Path("..").resolve()
ART = ROOT / "artifacts"
pkl = ART / "retrieval_artifacts.pkl"

pkl.unlink()
print("Deleted:", pkl)


In [None]:
import sys
from pathlib import Path

ROOT = Path("..").resolve()          # estás en /notebooks
SRC  = ROOT / "src"

sys.path.insert(0, str(SRC))

print("ROOT:", ROOT)
print("SRC :", SRC)
print("exists retrieval.py?", (SRC / "retrieval.py").exists())



In [None]:
import pandas as pd
import pickle
from retrieval import build_retrieval_artifacts

ART = ROOT / "artifacts"

item_360 = pd.read_parquet(ART / "item_360.parquet")
print("item_360:", item_360.shape)

art = build_retrieval_artifacts(item_360)
print("embeddings:", art.embeddings.shape)

with open(ART / "retrieval_artifacts.pkl", "wb") as f:
    pickle.dump(art, f)

print("Saved:", (ART / "retrieval_artifacts.pkl"), "size:", (ART / "retrieval_artifacts.pkl").stat().st_size)
