# Dipendenze

In [1]:
%pip install --quiet --upgrade python-dotenv pypdf langchain-openai langchain langchain-core langchain-text-splitters langchain-community langgraph

Note: you may need to restart the kernel to use updated packages.


### LangSmith Setup

In [2]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"

In [3]:
#Env File

from dotenv import load_dotenv

load_dotenv("intro.env");

### OAI LLM Setup

In [4]:
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

### Embeddings setup

In [5]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_EMBEDDINGS_ENDPOINT"),
    azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"),
    openai_api_version=os.getenv("AZURE_OPENAI_EMBEDDINGS_API_VERSION"),
)

### VectorStorage

In [6]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

# Progetto

### 1 - Retrieval

In [7]:
DIR_PATH = "./data"

In [8]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
import pypdf

loader = PyPDFDirectoryLoader(DIR_PATH)
docs = loader.load()

In [9]:
print(len(docs[0].page_content))

1375


In [10]:
#Chunking

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # chunk size (characters)
    chunk_overlap=100,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

all_splits = text_splitter.split_documents(docs)

print(f"Split into {len(all_splits)} sub-documents.")

Split into 36 sub-documents.


In [11]:
ids = vector_store.add_documents(documents=all_splits)

for i in range(3):
    print(ids[i])

5b83bdb2-bf46-4919-9750-a27299a5a0fd
14e57836-d14e-4cc7-b7cb-02d4f905dd97
f839f586-7e95-4d63-9e96-dceb5a7382d8


### 2 - Retrieval & Generation

In [37]:
#Initial State

answer_history = []
user_history = []

In [45]:
from langchain import hub
from langchain_core.prompts import PromptTemplate

prompt = hub.pull("rlm/rag-prompt")

In [46]:
template = """
Use the following pieces of context, User History and Answer History to answer the Question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

User History: {user_history}

Answer History: {answer_history}

Helpful Answer:
"""

In [47]:
prompt = PromptTemplate.from_template(template)

In [48]:
prompt

PromptTemplate(input_variables=['answer_history', 'context', 'question', 'user_history'], input_types={}, partial_variables={}, template='\nUse the following pieces of context, User History and Answer History to answer the Question.\nIf you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\nUse three sentences maximum and keep the answer as concise as possible.\nAlways say "thanks for asking!" at the end of the answer.\n\n{context}\n\nQuestion: {question}\n\nUser History: {user_history}\n\nAnswer History: {answer_history}\n\nHelpful Answer:\n')

In [39]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict

class State(TypedDict):

    question: str
    context: List[Document]
    answer: str
    answer_history: List[str]
    user_history: List[str]

In [49]:
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])

    messages = prompt.invoke({
        "question": state["question"], 
        "context": docs_content,
        "user_history": state["user_history"],
        "answer_history": state["answer_history"]
    })
    
    response = llm.invoke(messages)

    #History save
    answer_history = state.get("answer_history", [])
    answer_history.append(response.content)

    user_history = state.get("user_history", [])
    user_history.append(state["question"])

    return {"answer": response.content, 
            "answer_history": answer_history, 
            "user_history": user_history}

In [41]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")

graph = graph_builder.compile()

In [50]:
while True:
    user_input = input("Please insert your message! (digita 'exit' per uscire) ")
    if user_input.lower() == "exit":
        break

    # Invoca il grafo passando le history aggiornate
    result = graph.invoke({
        "question": user_input,
        "answer_history": answer_history,
        "user_history": user_history
    })

    print(f'Answer: {result["answer"]}\n')

    # Aggiorna le history per la prossima iterazione
    answer_history = result.get("answer_history", [])
    user_history = result.get("user_history", [])


KeyError: "Input to PromptTemplate is missing variables {'user_history', 'answer_history'}.  Expected: ['answer_history', 'context', 'question', 'user_history'] Received: ['question', 'context']\nNote: if you intended {user_history} to be part of the string and not a variable, please escape it with double curly braces like: '{{user_history}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT "