In [20]:
from langchain_community.document_loaders import PyPDFLoader, CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate
from typing_extensions import List, TypedDict
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
import getpass
import os

In [21]:
cards_loader = CSVLoader("./data/cards.csv")
doc1_loader = PyPDFLoader("./data/doc1.pdf")
doc2_loader = PyPDFLoader("./data/doc2.pdf")

In [22]:
cards = cards_loader.load()
doc1 = doc1_loader.load()
doc2 = doc2_loader.load()

In [23]:
csv_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200, chunk_overlap=0, add_start_index=True
)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=50, add_start_index=True
)

In [24]:
card_splits = csv_splitter.split_documents(cards)
doc1_splits = text_splitter.split_documents(doc1)
doc2_splits = text_splitter.split_documents(doc2)

In [25]:
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vector_store = Chroma(embedding_function=embeddings)

In [26]:
print(len(card_splits))

25


In [27]:
_ = vector_store.add_documents(documents=card_splits)
print("Cards done")
_ = vector_store.add_documents(documents=doc1_splits)
print("doc1 done")
_ = vector_store.add_documents(documents=doc2_splits)
print("doc2 done")

Cards done
doc1 done
doc2 done


In [28]:
prompt = PromptTemplate.from_template(
    """
    Ты помощник-ассистент, который отвечает на вопросы клиента. Используя следующий контекст для информации и ответь на вопрос клиента.
    Если ты не знаешь или не нашел ответа, так и скажи.
    Вопрос: {question} 
    Контекст: {context} 
    Ответ:
    """
)

In [29]:
llm = ChatOllama(model="llama3.1:8b")

In [30]:
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"], filter={"source": "./data/doc1.pdf"}, k=10)
    print(retrieved_docs)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
result = graph.invoke({"question": "Сколько мне будет стоит смски с оповещением об операциях"})

In [None]:
print(f'Context: {result["context"]}\n\n')
print(f'Answer: {result["answer"]}')