<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/pinecone_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinecone Vector Store - Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
# !pip install llama-index==0.9.31 pinecone-client==3.0.0.dev10

In [None]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

Build a Pinecone Index and connect to it

In [None]:
from pinecone import Pinecone
from pinecone import ServerlessSpec

os.environ[
    "PINECONE_API_KEY"
] = "<Your Pinecone API key, from app.pinecone.io>"

api_key = os.environ["PINECONE_API_KEY"]
pc = Pinecone(api_key=api_key)

In [None]:
# Dimensions are for text-embedding-ada-002
pc.create_index(
    "quickstart-index",
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-west-2"),
)

In [None]:
pinecone_index = pc.Index("quickstart-index")

Build the PineconeVectorStore and VectorStoreIndex

In [None]:
from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores import PineconeVectorStore

INFO:numexpr.utils:Note: NumExpr detected 10 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 10 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [None]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [None]:
import openai

os.environ[
    "OPENAI_API_KEY"
] = "<Your OpenAI API key>"  # Need for storage_context creation

openai.api_key = os.environ["OPENAI_API_KEY"]

vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index, namespace="test_05_14"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Upserted vectors:   0%|          | 0/7 [00:00<?, ?it/s]

Define metadata filters

In [None]:
from llama_index.vector_stores.types import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", operator=FilterOperator.EQ, value="Mafia"),
    ]
)

Retrieve from vector store with filters

In [None]:
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[]

Multiple Metadata Filters with `AND` condition

In [None]:
from llama_index.vector_stores.types import (
    FilterOperator,
    FilterCondition,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.AND,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[]

Multiple Metadata Filters with `OR` condition

In [None]:
from llama_index.vector_stores.types import (
    FilterOperator,
    FilterCondition,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[]

Use keyword arguments specific to pinecone

In [None]:
retriever = index.as_retriever(
    vector_store_kwargs={"filter": {"theme": "Mafia"}}
)
retriever.retrieve("What is inception about?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='c73279eb-411a-4672-88c9-cf934ad1bf8c', embedding=[-0.00178836891, -0.0238407217, -0.0128082475, -0.0354147442, -0.00969749317, 0.0257046539, -0.000490778184, 0.000809174, -0.0218256582, -0.0278834421, 0.0238407217, 0.01876528, 0.0284375846, -0.0019048648, 0.00637263851, 0.0153522659, 0.029117668, -0.00807284843, 0.0104090627, -0.000399667391, 0.0102390414, 0.00693307817, -0.0297725648, -0.000678116106, 0.00477633, -0.00108309672, 0.00440165447, -0.0270018522, 0.021548586, -0.0175058655, 0.0120022222, -0.0240674149, -0.00652376842, 0.0020103408, 0.0100942096, -0.003102883, 0.00582164479, -0.0105350045, 0.000989427674, 0.0146092111, 0.0140172858, 0.00744314119, -0.0082617607, -0.0168761574, 0.0058814669, -0.00278488081, 0.0226190891, -0.0117125567, -0.0136142736, 0.0145840226, 0.00707791094, 0.0314853676, -0.0147855291, -0.0302259531, 0.0201254468, 0.00941412523, 0.00496524246, -0.0163472034, 0.00379083841, -0.0177955311, 0.0108057782, 0.00447092252, -0.