# Pinecone Vector Store - Auto Retriever

#### Creating a Pinecone Index

In [17]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [18]:
import pinecone

api_key = os.environ['PINECONE_API_KEY']
pinecone.init(api_key=api_key, environment="eu-west1-gcp")

In [19]:
# dimensions are for text-embedding-ada-002
try:
    pinecone.create_index("quickstart-index", dimension=1536, metric="euclidean", pod_type="p1")
except Exception:
    # most likely index already exists
    pass

In [20]:
pinecone_index = pinecone.Index("quickstart-index")

#### Load documents, build the PineconeVectorStore and GPTVectorStoreIndex

In [21]:
from llama_index import GPTVectorStoreIndex, StorageContext
from llama_index.vector_stores import PineconeVectorStore

In [22]:
from llama_index.data_structs.node import Node

nodes = [
    Node("Michael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.", extra_info={
        "category": "Sports",
        "country": "United States",
    }),
    Node("Angelina Jolie is an American actress, filmmaker, and humanitarian. She has received numerous awards for her acting and is known for her philanthropic work.", extra_info={
        "category": "Entertainment",
        "country": "United States",
    }),
    Node("Elon Musk is a business magnate, industrial designer, and engineer. He is the founder, CEO, and lead designer of SpaceX, Tesla, Inc., Neuralink, and The Boring Company.", extra_info={
        "category": "Business",
        "country": "United States",
    }),
    Node("Rihanna is a Barbadian singer, actress, and businesswoman. She has achieved significant success in the music industry and is known for her versatile musical style.", extra_info={
        "category": "Music",
        "country": "Barbados",
    }),
    Node("Cristiano Ronaldo is a Portuguese professional footballer who is considered one of the greatest football players of all time. He has won numerous awards and set multiple records during his career.", extra_info={
        "category": "Sports",
        "country": "Portugal",
    })
]

In [23]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='test')
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [24]:
index = GPTVectorStoreIndex(nodes, storage_context=storage_context)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 211 tokens
> [build_index_from_nodes] Total embedding token usage: 211 tokens
> [build_index_from_nodes] Total embedding token usage: 211 tokens


In [25]:
from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever
from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo


vector_store_info = VectorStoreInfo(
    content_info='brief biography of celebrities',
    metadata_info=[
        MetadataInfo(
            name='category', 
            type='str', 
            description='Category of the celebrity, one of [Sports, Entertainment, Business, Music]'),
        MetadataInfo(name='country', type='str', description='Country of the celebrity, one of [United States, Barbados, Portugal]'),
    ]
)
retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)

In [26]:
retriever.retrieve('Tell me about two celebrities from United States')

INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto query: celebrities
Auto query: celebrities
Auto query: celebrities
INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto filter: {'country': 'United States'}
Auto filter: {'country': 'United States'}
Auto filter: {'country': 'United States'}
INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto top_k: 2
Auto top_k: 2
Auto top_k: 2
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 3 tokens
> [retrieve] Total embedding token usage: 3 tokens
> [retrieve] Total embedding token usage: 3 tokens


[NodeWithScore(node=Node(text='category: Entertainment\ncountry: United States\n\nAngelina Jolie is an American actress, filmmaker, and humanitarian. She has received numerous awards for her acting and is known for her philanthropic work.', doc_id='6821b1fe-e1dc-400c-ad2c-83f7fa683321', embedding=None, doc_hash='4086bd15d984c4f3ee3d4f911f0a347735406351d1936b6060b411707d3e82cc', extra_info={'category': 'Entertainment', 'country': 'United States'}, node_info={}, relationships={}), score=0.80265522),
 NodeWithScore(node=Node(text='category: Sports\ncountry: United States\n\nMichael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.', doc_id='4cf176e5-363f-479b-8979-c3e07cfaead8', embedding=None, doc_hash='9aaec18f659138a23ca519f8d6d1f3997d34aae993b8c07443b165c13163b886', extra_info={'category': 'Sports', 'country': 'United States'}, node_info={}, relationships={}), score=0.766244411)]

In [27]:
retriever.retrieve('Tell me about Sports celebrities from United States')

INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto query: Sports celebrities
Auto query: Sports celebrities
Auto query: Sports celebrities
INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto filter: {'category': 'Sports', 'country': 'United States'}
Auto filter: {'category': 'Sports', 'country': 'United States'}
Auto filter: {'category': 'Sports', 'country': 'United States'}
INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto top_k: 2
Auto top_k: 2
Auto top_k: 2
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 2 tokens
> [retrieve] Total embedding token usage: 2 tokens
> [retrieve] Total embedding token usage: 2 tokens


[NodeWithScore(node=Node(text='category: Sports\ncountry: United States\n\nMichael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.', doc_id='4cf176e5-363f-479b-8979-c3e07cfaead8', embedding=None, doc_hash='9aaec18f659138a23ca519f8d6d1f3997d34aae993b8c07443b165c13163b886', extra_info={'category': 'Sports', 'country': 'United States'}, node_info={}, relationships={}), score=0.797632515)]