# Pinecone Vector Store

In [6]:
import os
from pinecone import Pinecone, ServerlessSpec, PodSpec

pc = Pinecone(
    api_key=os.environ.get("PINECONE_API_KEY")
)

In [7]:
pc.list_indexes()

{'indexes': []}

In [11]:
if "quickstart-index" not in pc.list_indexes().names():
    pc.create_index(
        name='quickstart-index', 
        dimension=1536, 
        metric='euclidean',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )

In [12]:
pc.list_indexes()

{'indexes': [{'dimension': 1536,
              'host': 'quickstart-index-es1tgmv.svc.aped-4627-b74a.pinecone.io',
              'metric': 'euclidean',
              'name': 'quickstart-index',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [14]:
pinecone_index = pc.Index("quickstart-index")

In [18]:
# pinecone_index.delete(delete_all=True)

### Creating dummy data

In [19]:
books = [
    {
        "title": "To Kill a Mockingbird",
        "author": "Harper Lee",
        "content": (
            "To Kill a Mockingbird is a novel by Harper Lee published in"
            " 1960..."
        ),
        "year": 1960,
    },
    {
        "title": "1984",
        "author": "George Orwell",
        "content": (
            "1984 is a dystopian novel by George Orwell published in 1949..."
        ),
        "year": 1949,
    },
    {
        "title": "The Great Gatsby",
        "author": "F. Scott Fitzgerald",
        "content": (
            "The Great Gatsby is a novel by F. Scott Fitzgerald published in"
            " 1925..."
        ),
        "year": 1925,
    },
    {
        "title": "Pride and Prejudice",
        "author": "Jane Austen",
        "content": (
            "Pride and Prejudice is a novel by Jane Austen published in"
            " 1813..."
        ),
        "year": 1813,
    },
]

In [20]:
import uuid
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding()

In [21]:
entries = []
for book in books:
    vector = embed_model.get_text_embedding(book["content"])
    entries.append(
        {"id": str(uuid.uuid4()), "values": vector, "metadata": book}
    )
pinecone_index.upsert(entries)

{'upserted_count': 4}

### query existing store

In [22]:
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.core.response.pprint_utils import pprint_source_node

In [23]:
vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index, text_key="content"
)
retriever = VectorStoreIndex.from_vector_store(vector_store).as_retriever(
    similarity_top_k=1
)

In [24]:
nodes = retriever.retrieve("What is that book about a bird again?")

In [25]:
pprint_source_node(nodes[0])

Node ID: dd3ed042-07b4-4d24-b230-76dbaa6aaf44
Similarity: 0.407682419
Text: To Kill a Mockingbird is a novel by Harper Lee published in
1960...


In [26]:
nodes[0].node.metadata

{'author': 'Harper Lee', 'title': 'To Kill a Mockingbird', 'year': 1960.0}

### delete all data

In [27]:
pinecone_index.delete(delete_all=True)

{}