<a href="https://colab.research.google.com/github/itachi2303/Quater2_projects/blob/main/langchain_rag_project_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -qU langchain-pinecone langchain-google-genai

In [None]:
from pinecone import Pinecone, ServerlessSpec
from google.colab import userdata
import os

pinecone_api_key = os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')
pc = Pinecone(api_key = pinecone_api_key)

In [None]:
existing_indexes = pc.list_indexes()
existing_indexes

[]

In [None]:
index_name = "study-rag-project"

pc.create_index(
      name=index_name,
      dimension=768,
      spec=ServerlessSpec(cloud='aws', region="us-east-1"),
      metric="cosine"
       )
first_index = pc.Index(index_name)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Google_api_key setup
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')


In [None]:
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
vector = embedding_model.embed_query("hello how are u")
vector[:10]

[0.0134428096935153,
 -0.010091329924762249,
 -0.02226116508245468,
 -0.008058574050664902,
 0.04874265193939209,
 -0.010381502099335194,
 0.018190965056419373,
 -0.011156506836414337,
 0.005312008783221245,
 0.00734741473570466]

In [None]:
existing_indexes = pc.list_indexes()
existing_indexes

[
    {
        "name": "study-rag-project",
        "dimension": 768,
        "metric": "cosine",
        "host": "study-rag-project-k522enj.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "deletion_protection": "disabled"
    }
]

In [None]:
from langchain_pinecone import PineconeVectorStore
vectorstore = PineconeVectorStore(index=first_index, embedding=embedding_model)

In [None]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)




In [None]:
documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [None]:
uuids = [str(uuid4()) for _ in range(len(documents))]

In [None]:
vectorstore.add_documents(documents=documents, ids=uuids)

['7eb3a9e9-260c-4975-b3f4-d75cdfd69d2b',
 'cebb2aaf-90d8-4f01-85df-d5d8d76e724d',
 '0b998fa4-053c-4469-8fe4-653e7ba9c67d',
 '85dec559-0943-4e25-9f86-8291642656ba',
 '054d8778-dfbb-4a68-ab20-3bbf98c1d50f',
 '25441e88-de30-417b-94cb-53ae11b73a81',
 '3e0496e6-5a07-4235-9b6f-9c5d00af08dc',
 'f1c55e98-d3fc-42f5-94a2-10c07b7f45e5',
 '940bebdc-3bf6-451f-8025-2e5e4a994cf3',
 '10bef52b-7f76-491b-96e5-649ebd6827ef']

### deleting an items from vector store

In [None]:
# vectorstore.delete(ids=[uuids[-1]])

In [None]:
search_query = vectorstore.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in search_query:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [None]:
results = vectorstore.similarity_search_with_score(
    "Will it be hot tomorrow?",
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.667716] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]
* [SIM=0.577374] I have a bad feeling I am going to get deleted :( [{'source': 'tweet'}]
* [SIM=0.537337] I had chocalate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]
* [SIM=0.533720] The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-exp",
    temperature=0.7

)

In [None]:
def ask(query : str):
  query_data = vectorstore.similarity_search(query,k=3)
  response = llm.invoke(f"Answer this query :{query}, Here are some refrences {query_data}")
  return response


In [None]:
res = ask("LangChain provides abstractions to make working with LLMs easy")
print(res.content)

Okay, I understand. You've provided a statement about LangChain making LLM interactions easier, and then given me three documents, each representing a tweet. 

Here's a breakdown of what we can infer and how it relates to your initial statement:

*   **Initial Statement:** "LangChain provides abstractions to make working with LLMs easy." This is a general claim about the purpose of LangChain.

*   **Document Analysis:**
    *   **Document 1 (ID: f1c55e98...)**: This tweet promotes **LangGraph** as the "best framework" for stateful and agentic applications. While LangGraph is related to LangChain, it's not directly about making working with LLMs *easy* in the most basic sense. It speaks to the power and sophistication of a specific part of the LangChain ecosystem. 
    *   **Document 2 (ID: 0b998fa4...)**: This tweet expresses excitement about building a project with **LangChain**. This tweet supports the idea that people are using LangChain and finding value in it.
    *   **Document 3