# Pinecone

In [1]:
from pinecone import Pinecone

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import os

pc = Pinecone(
    api_key=os.getenv("PINECONE_API_KEY")
)



In [4]:
pc.list_indexes()

[
    {
        "name": "idat-index",
        "dimension": 1536,
        "metric": "cosine",
        "host": "idat-index-b9eee40.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "deletion_protection": "disabled"
    }
]

In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [6]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=pc.Index("idat-index"), embedding=embeddings)

In [7]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['48c6301d-cb83-4d55-a7a5-92c4ee322787',
 'f866eac8-c37d-4680-9016-4841334f6b66',
 '9447b24f-232a-4807-a879-61ee7730bf52',
 '6127502b-6385-429c-bf7d-fe8e5b0cfb87',
 'f6f7dab2-2f9e-477f-ac33-de3a37578a17',
 '93d0e08d-5e79-470e-82d4-01805f60519d',
 '09a33963-847a-44fd-8afc-d38416e4fcad',
 '2b132660-c0fd-4e9f-8348-b541f6a30429',
 '341ff914-2c4d-4546-9188-b50cac32e783',
 '92ba57ac-e09c-4064-a98b-f535920d6118']

In [8]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
