In [16]:
! pip install --quiet langchain langchain_cohere langchain-openai tiktoken langchainhub chromadb langgraph

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.1/611.1 kB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m65.6 MB/s[0m eta [36m0:00:00

In [17]:
### LLMs
import os

os.environ["COHERE_API_KEY"] = "Your API Key here"

In [47]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_cohere import CohereEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

# Dosyayı yükle
loader = TextLoader("llm-wikipedia.txt")  # dosyanın içeriğini yükle
documents = loader.load()  # loader nesnesinin load() metodunu çağırarak, belgeleri yükleyip documents değişkenine ata

# RecursiveCharacterTextSplitter sınıfını kullanarak bir metin parçalayıcı oluştur
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=0,
)
docs = text_splitter.split_documents(documents)  # documentsdeki verileri text_splitter ile küçük parçalara ayırma

docs[0]

# Cohere Embedding modelini kullanarak embedding işlemi
embd = CohereEmbeddings(model="embed-english-v2.0")  # Burada embed modelini kullandık

# Add to vectorstore
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embd,
)

# Retriever oluştur
retriever = vectorstore.as_retriever()


In [52]:
## birden fazla dosya varsa
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_cohere import CohereEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

# Birden fazla dosya yüklemek için dosya yollarını liste halinde belirtin
file_paths = [
    "llm-wikipedia.txt",
    "llm-history.txt",
]

# Dosyaları yükle
documents = []
for file_path in file_paths:
    loader = TextLoader(file_path)
    documents.extend(loader.load())  # Her dosyanın içeriğini yükleyip documents listesine ekle

# RecursiveCharacterTextSplitter sınıfını kullanarak bir metin parçalayıcı oluştur
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=0,
)

# Yüklenen belgeleri parçalara ayır
docs = text_splitter.split_documents(documents)  # documentsdeki verileri text_splitter ile küçük parçalara ayırma

embd = CohereEmbeddings(model="embed-english-v2.0")

# Vectorstore'a ekle
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embd,
)

# Retriever oluştur
retriever = vectorstore.as_retriever()


In [25]:
docs[0]

Document(metadata={'source': 'llm-wikipedia.txt'}, page_content='A large language model (LLM) is a type of machine learning model designed for natural language processing tasks such as language generation. LLMs are language models with many parameters, and are trained with self-supervised learning on a vast amount of text.')

In [49]:
### Router
### Bu kod, bir router yapısı oluşturur. Kullanıcının soruları, belirli bir vectorstore'a yönlendirilir ve Cohere modelini kullanarak bu sorulara cevap verilir.
### Model, sorulara verilen yanıtların doğruluğunu vectorstore'daki içeriklere dayanarak belirler.
### Yanıtlar, tool calls aracılığıyla vectorstore'dan alınan bilgiyle desteklenir.

from langchain_cohere import ChatCohere #kullanıcı ile etkileşimde bulunarak soruları yanıtlamak için kullanılır
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


# Data model

class vectorstore(BaseModel):
    """
    A vectorstore containing documents related to agents, prompt engineering, and adversarial attacks. Use the vectorstore for questions on these topics.
    """

    query: str = Field(description="The query to use when searching the vectorstore.")


# Preamble
preamble = """You are an expert at routing a user question to a vectorstore.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics."""

# LLM with tool use and preamble
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_router = llm.bind_tools(
    tools=[vectorstore], preamble=preamble # modelin, kullanıcının sorularını alıp vectorstore'a yönlendirecek şekilde yapılandırıldığını belirt
)

# Prompt
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
    ]
)

#Bu işlem, question_router'ı oluşturur ve bu nesne, kullanıcının sorusuna yanıt verirken vectorstore'ı kullanarak yönlendirme yapar.
question_router = route_prompt | structured_llm_router
response = question_router.invoke(
    {"question": "When the start?"}
)
print(response.response_metadata["tool_calls"])
response = question_router.invoke({"question": "What are the types of agent memory?"})
print(response.response_metadata["tool_calls"])
response = question_router.invoke({"question": "Hi how are you?"})
# response.response_metadata içinde tool_calls anahtarının bulunup bulunmadığını kontrol ediyoruz. Eğer tool_calls varsa, bu, vectorstore'ın doğru şekilde çalıştığını gösterir
print("tool_calls" in response.response_metadata)

[{'id': 'vectorstore_72rfjg2059g7', 'type': 'function', 'function': {'name': 'vectorstore', 'arguments': '{"query":"start LLM"}'}}]
[{'id': 'vectorstore_2v0c9n3wxm3z', 'type': 'function', 'function': {'name': 'vectorstore', 'arguments': '{"query":"agent memory"}'}}]
False


In [67]:
### Retrieval Grader


# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# Prompt
preamble = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments.schema(), preamble=preamble)

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "types of agent memory"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
response = retrieval_grader.invoke({"question": question, "document": doc_txt})


# Raw binary score'u al
raw_binary_score = response['binary_score']

binary_score = 'yes' if raw_binary_score == 'yes' else 'no'

# Sonucu yazdır
print(f"Retrieval score: {binary_score}")


Retrieval score: no


In [50]:
### Generate

from langchain_core.messages import HumanMessage # insan tarafından sorulan bir soruyu veya verilen bir metni modelin alabileceği formata dönüştürür
from langchain_core.output_parsers import StrOutputParser

# Preamble
# Modelin soruları yanıtlarken kullanacağı bağlantılı bilgileri (retrieved context) kullanmasını belirtiyor. Modelin doğru yanıt veremediği durumlarda "Bilmiyorum" demesi gerektiğini söylüyor.

preamble = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. Don't say emotional answer. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""

# LLM
# .bind(preamble=preamble): Bu metod, modelin preamble'ı alıp modelin nasıl davranacağına dair talimatları ekler.
llm = ChatCohere(model_name="command-r", temperature=0).bind(preamble=preamble)


# Prompt
def prompt(x):
    return ChatPromptTemplate.from_messages(
        [
            HumanMessage(
                f"Question: {x['question']} \nAnswer: ",
                additional_kwargs={"documents": x["documents"]}, #modelin soruya verdiği yanıtı oluştururken kullanacağı bağlam verilerini belirtir.
            )
        ]
    )


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "What is the LLM?"  # Example question, replace with your desired question
generation = rag_chain.invoke({"documents": docs, "question": question})
print(generation)

GPT-1 is usually considered the first LLM, introduced in 2018.


In [33]:
### LLM fallback
### Cohere'ın command-r modelini kullanarak bir fallback (yedekleme) çözümü oluşturma

from langchain_core.output_parsers import StrOutputParser

# Preamble
preamble = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge. Use three sentences maximum and keep the answer concise."""

# LLM
llm = ChatCohere(model_name="command-r", temperature=0).bind(preamble=preamble)


# Prompt
def prompt(x):
    return ChatPromptTemplate.from_messages(
        [HumanMessage(f"Question: {x['question']} \nAnswer: ")]
    )


# Chain
llm_chain = prompt | llm | StrOutputParser()

# Run
question = "What are the types of agent memory?"
generation = llm_chain.invoke({"question": question})
print(generation)

Agent memory can be categorized into several types, including **episodic memory** (stores specific events or experiences), **semantic memory** (stores general knowledge and facts), and **procedural memory** (stores how to perform tasks or skills). Additionally, **working memory** (temporarily holds and manipulates information) and **long-term memory** (stores information for extended periods) are also crucial components. These types enable agents to learn, adapt, and make informed decisions.


In [34]:
### Hallucination Grader

# Data model for hallucination grading
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""
    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

# LLM
llm = ChatCohere(model="command-r", temperature=0)

# Şemayı kullanarak çıkış yapılandırmak
structured_llm_grader = llm.with_structured_output(
    schema=GradeHallucinations.schema()
)

# Preamble
preamble = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", preamble),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

# Hallucination grader'ı oluştur
hallucination_grader = hallucination_prompt | structured_llm_grader

# Örnek kullanım
# `docs` ve `generation` daha önce tanımlanmalı
response = hallucination_grader.invoke({"documents": docs, "generation": generation})

# Raw binary score'u al
raw_binary_score = response['binary_score']

# Hallüsinasyon skoru: Cevabın doğruluğuna bağlı olarak "yes" veya "no" döner
binary_score = 'yes' if raw_binary_score == 'yes' else 'no'

# Sonucu yazdır
print(f"Hallucination score: {binary_score}")


Hallucination score: no


In [54]:
### Answer Grader


# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# Preamble
preamble = """You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer.schema(), preamble=preamble)

# Prompt
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({"question": question, "generation": generation})

{'binary_score': '0'}

In [70]:
### Answer Grader

# Data model for grading answers
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""
    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

# Preamble
preamble = """You are a grader assessing whether an answer addresses / resolves a question.
Give a binary score 'yes' or 'no'. 'Yes' means that the answer resolves the question."""

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer.schema(), preamble=preamble)

# Prompt
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

# Grading the answer by invoking the prompt and the model
response = answer_prompt | structured_llm_grader
result = response.invoke({"question": question, "generation": generation})



# Raw binary score'u al
raw_binary_score = result['binary_score']

# Hallüsinasyon skoru: Cevabın doğruluğuna bağlı olarak "yes" veya "no" döner
binary_score = 'yes' if raw_binary_score == 'yes' else 'no'


# Printing the result
print(f"Answer Grading score: {binary_score}")


Answer Grading score: no


In [57]:
from typing import List

from typing_extensions import TypedDict


class GraphState(TypedDict):
    """|
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]

In [90]:
from langchain.schema import Document

def retrieve(state):
    """
    Retrieve documents from pre-loaded documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval from pre-loaded documents (not web search)
    documents = retriever.invoke(question)  # Assuming 'retriever' has been defined
    return {"documents": documents, "question": question}


def llm_fallback(state):
    """
    Generate answer using the LLM w/o vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---LLM Fallback---")
    question = state["question"]
    generation = llm_chain.invoke({"question": question})
    return {"question": question, "generation": generation}


def generate(state):
    """
    Generate answer using the vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    if not isinstance(documents, list):
        documents = [documents]

    # RAG generation (Retrieve and Generate)
    generation = rag_chain.invoke({"documents": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each document for relevance
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}


### Edges ###

def route_question(state):
    """
    Route question to RAG (Retrieve and Generate).

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """
    print("---ROUTE QUESTION---")
    question = state["question"]

    # Directly use retrieve instead of web search
    return "retrieve"


def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """
    print("---ASSESS GRADED DOCUMENTS---")
    filtered_documents = state["documents"]

    if not filtered_documents:
        # All documents have been filtered, check relevance again
        # We will re-generate a new query if no relevant documents
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, GENERATE---")
        return "generate"
    else:
        # We have relevant documents, so proceed to generation
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """
    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score.binary_score
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


In [88]:
import pprint
from langgraph.graph import END, StateGraph, START

# Define the nodes (Removed web_search, keep retrieve and other relevant nodes)
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # Retrieve from documents
workflow.add_node("grade_documents", grade_documents)  # Grade documents
workflow.add_node("generate", generate)  # RAG (Retrieve and Generate)
workflow.add_node("llm_fallback", llm_fallback)  # LLM fallback if generation fails

# Build graph
workflow.add_conditional_edges(
    START,
    route_question,
    {
        "retrieve": "retrieve",  # Directly using the 'retrieve' for document-based search
        "llm_fallback": "llm_fallback",  # Fallback to llm_fallback if no suitable documents are found
    },
)
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "generate": "generate",  # Proceed to generation if documents are graded and valid
    },
)
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",  # Re-generate if hallucinations are detected
        "not useful": "retrieve",  # Fall back to documents if generation is not useful
        "useful": END,  # End the process if the answer is useful
    },
)
workflow.add_edge("llm_fallback", END)

# Compile the workflow into an app
app = workflow.compile()


In [93]:
# Run
inputs = {"question": "Hello, how are you today?"}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value["generation"])

---ROUTE QUESTION---
---ROUTE QUESTION TO LLM---
---LLM Fallback---
"Node 'llm_fallback':"
'\n---\n'
("As an AI, I don't have feelings, but I'm functioning well and ready to "
 "assist you. I hope you're doing well! How can I help you today?")
