In [None]:
from embedding_atlas.widget import EmbeddingAtlasWidget
from embedding_atlas.projection import compute_text_projection
import pandas as pd
from datasets import load_dataset

In [None]:
# Load a dataset
ds = load_dataset("james-burton/wine_reviews", split="validation")
df = pd.DataFrame(ds)

## Computing Embeddings with Models Served by Ollama

To run this example please make sure to install [Ollama](https://ollama.com/download) first and run the following command to download the [nomic-embed-text](https://ollama.com/library/nomic-embed-text) model:

```bash
ollama pull nomic-embed-text
```

Based on your machine's configuration, embedding all text items may take some time. For reference, on a 36 GB M3 Max MacBook Pro, executing the cell below takes ~90 seconds.

In [None]:
# Compute text embedding and projection of the embedding using Ollama server
compute_text_projection(
    df,
    text="country",
    x="projection_x",
    y="projection_y",
    neighbors="neighbors",
    text_projector="litellm",
    api_base_url="http://localhost:11434",
    model="ollama/nomic-embed-text",
    batch_size=512,
    sync=True,
    lol=10
)

In [None]:
# Display the dataset with the Embedding Atlas widget
w = EmbeddingAtlasWidget(df, text="description", x="projection_x", y="projection_y", neighbors="neighbors")
w

In [None]:
# Get the selection from the widget as a dataframe
w.selection()

## Computing Embeddings with Models Served by OpenAI

In [None]:
# When running embedding batches asynchronously in a notebook, we need to patch the event loop
!uv pip install nest-asyncio -q
import nest_asyncio

nest_asyncio.apply()

In [None]:
# Compute text embedding and projection of the embedding using Ollama server
compute_text_projection(
    df,
    text="description",
    x="projection_x",
    y="projection_y",
    neighbors="neighbors",
    text_projector="litellm",
    # Your OpenAI API key. You can omit this and set the OPENAI_API_KEY environment variable instead.
    api_key="sk-xxx",
    model="openai/text-embedding-3-small",
    # OpenAI's limit is 300K input token per request
    batch_size=1024,
    # Since calling an API is IO-bound, we can benefit from async processing
    sync=False,
)

In [None]:
# Display the dataset with the Embedding Atlas widget using OpenAI embeddings
w2 = EmbeddingAtlasWidget(df, text="description", x="projection_x", y="projection_y", neighbors="neighbors")
w2