In [26]:
from dotenv import load_dotenv
from pprint import pprint

from typing import TypedDict, Annotated
from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.prebuilt import tools_condition, ToolNode
from langgraph.checkpoint.memory import MemorySaver

from langchain.globals import *
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, AnyMessage

from langchain_openai import ChatOpenAI

from langchain_unstructured import UnstructuredLoader

load_dotenv()

class MessagesState(MessagesState):
    pass

llm = ChatOpenAI(model="gpt-4o")

In [41]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./data/AI-Powered_Search.pdf")
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [42]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [43]:
import time

index_name = "ai-powered-search"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [44]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [45]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [46]:
from uuid import uuid4
from langchain_core.documents import Document

uuids = [str(uuid4()) for _ in range(len(pages))]
vector_store.add_documents(documents=pages, ids=uuids)

['0499ba80-f446-42cc-8bcd-2d4709962fcc',
 'b7cba623-e4d1-42ac-aa89-d5a6d3866bf4',
 'ddbcd137-42d0-4be3-b573-5484b92d7418',
 'c63a48b7-efdc-4081-8b9f-7f046f8ff460',
 '332aee2b-11f4-40ad-b52f-c10fcba8ac33',
 '74a2a9b5-19c2-4948-8d5e-5a563e141e64',
 'e4e88f56-932c-4c21-b606-ee0b68441c93',
 'db827d39-2a34-4eb1-9bbd-c663060fc784',
 'f41e3736-6592-40cb-a882-b5f7b5a0fa85',
 'cfc1cf9c-b2b2-47dc-a23a-68ecc4eea749',
 '84a76dfb-f506-43d1-82e7-4e864f86514f',
 'd9413a53-f364-4cee-9c67-500d9b89b8ee',
 '14ef25f2-4d89-4905-b80a-49e8e493fcc8',
 'da915954-ba89-4fa0-99ac-4dab0a35db05',
 '53b0cf61-b06c-4188-b4f3-a6e662d87496',
 '8ee48e51-fa57-43a7-9c8e-1fd5b56eba94',
 '73dcb8d1-86bb-43c5-9e98-072683fcd4ca',
 '0eb816b8-af09-4fba-a54a-392b6dec8280',
 'efe8f4f9-25c6-454a-a82e-883cd955443d',
 'c0692e9f-d63d-4291-88e7-c9627f76914b',
 '6332a69d-7740-46ea-b25b-5a39273f49e7',
 '1c2cf797-5bcd-4231-a164-301fc6c12d10',
 '7ce32b36-5029-4ec1-b2a4-37ae302be38b',
 'c99eeb3f-566c-41fd-92fe-e29f109f6e46',
 'c8fdfa0e-285c-

In [47]:
results = vector_store.similarity_search_with_score(
    "what are embeddngs?", k=2
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.438082] 372.3 Distributional semantics and embeddings
In recent years, the distributional hypoth esis has been applied to create semantic
understandings of terms and term sequences through what are known as embeddings.
An embedding is a set of coordinates in a vector space into which we map (or “embed”)
a concept. More concretely, that set of coordinates is a numerical vector (a list of num-
bers) that is intended to represent the semantic meaning of your data (text, image,
audio, behavior, or other data modalities). Text-based embeddings can represent term
sequences of any length, but when representi ng individual words or phrases, we call
those embeddings word embeddings. 
 The term sequence is often encoded into a reduced-dimension embedding that
can be compared with the vectors for all of the other embeddings within the corpus to
find the most semantically related documents.
 To understand this process, it may be useful to think of how a search engine works
out of the box.

In [49]:
from langchain_core.prompts.chat import SystemMessagePromptTemplate

prompt = SystemMessagePromptTemplate.from_template("You are an assistant for question-answering tasks. Use only the context provided to answer the question.  Do not make up your own answer. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nContext: {context} \nQuestion: {question}")
prompt

SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use only the context provided to answer the question.  Do not make up your own answer. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nContext: {context} \nQuestion: {question}"), additional_kwargs={})

In [59]:
question = "what is a tranformer LLM?"
retrieved_docs = vector_store.similarity_search(question, k=3)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
messages = prompt.format_messages(question=question, context=docs_content)
response = llm.invoke(messages)
print("Messages:")
print(messages)
print("Content:")
print(response.content)

Messages:
[SystemMessage(content='You are an assistant for question-answering tasks. Use only the context provided to answer the question.  Do not make up your own answer. If you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer concise.\nContext: 35113.4 Applying Transformers to search\nmodel for encoding. The encoding process then outputs a tensor, which is an array of\nvectors (one vector for each token). \n13.3.2 Openly available pret rained Transformer models\nWhile Transformers enable st ate-of-the-art language mode ls to be built, having the\nknowledge and resources to build them from scratch can present a large hurdle for many.\nOne very important aspect of  working with Transformers is the large community and\nopen toolsets that make it possible for any engineer to quickly get up and running with\nthe technology. All it takes is some knowledge of Python and an internet connection. \n The models that are trained by this proce