In [None]:
!pip install sentence-transformers faiss-cpu openai google-genai pillow matplotlib scikit-learn requests

In [1]:
# load .env
from dotenv import load_dotenv
load_dotenv()

import os

print("Keys loaded:")
print("OpenAI:", bool(os.getenv("OPENAI_API_KEY")))
print("Google:", bool(os.getenv("GOOGLE_API_KEY")))
print("Stability:", bool(os.getenv("STABILITY_API_KEY")))

Keys loaded:
OpenAI: True
Google: True
Stability: True


In [None]:
from pipeline import run_workflow

csv_path = "walmart.csv"
output_root = "./outputs/run"

results = run_workflow(
    csv_path=csv_path,
    output_root=output_root,
    image_model="dalle",        # "dalle" / "sd" /  "nano"
    sampling_mode="cluster",    # "rag" / "cluster" / "random"
    product_name="Dash Rapid Egg Cooker" # specify your product name here
)

  from .autonotebook import tqdm as notebook_tqdm



CSV File:       walmart.csv
Output Folder:  ./outputs/egg_run
Image Model:    dalle
Sampling Mode:  cluster

[Paths] Artifacts dir: ./outputs/egg_run/artifacts
[Paths] Images dir:    ./outputs/egg_run/images_dalle_cluster
[Ingestion] Loaded 1919 cleaned reviews.


Batches: 100%|██████████| 60/60 [00:01<00:00, 31.15it/s]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[Embedding] Saved embeddings to ./outputs/egg_run/artifacts/embeddings.npy
[Clustering] n_clusters=8, silhouette=0.0418, ch_score=85.4409, db_score=3.7157
[Clustering] Saved clustered reviews to ./outputs/egg_run/artifacts/reviews_with_clusters.csv
[RAGStore] Built FAISS index with 1919 vectors.

[DALL-E 3] Generating: feature_based...
[DALL-E 3] Saved: ./outputs/egg_run/images_dalle_cluster/dalle_feature_based.png

[DALL-E 3] Generating: ideal_from_pos...
[DALL-E 3] Saved: ./outputs/egg_run/images_dalle_cluster/dalle_ideal_from_pos.png

[DALL-E 3] Generating: realistic_from_pos_neg...
[DALL-E 3] Saved: ./outputs/egg_run/images_dalle_cluster/dalle_realistic_from_pos_neg.png
[DALL-E 3] Saved comparison image: ./outputs/egg_run/images_dalle_cluster/dalle_comparison.png




In [None]:
results.keys()

In [None]:
core_visual_df = results["core_visual_df"]
prompts = results["prompts"]
images_dir = results["images_dir"]

core_visual_df.head()
prompts

In [None]:
import os
from IPython.display import Image as IPyImage, display

for fname in os.listdir(images_dir):
    if fname.lower().endswith((".png", ".jpg", ".jpeg")):
        print(fname)
        display(IPyImage(os.path.join(images_dir, fname)))