In [None]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingface_api_token"

from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Define the model ID
model_id = "meta-llama/Llama-3.1-8B-Instruct"
# model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Set the pad_token if it is not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use the eos_token as the pad_token

# Create a text-generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

# Wrap the pipeline in a HuggingFacePipeline object
llm = HuggingFacePipeline(pipeline=pipe)

# Now you can use `llm` within the LangChain framework

chat_model = ChatHuggingFace(llm=llm)


from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-large")


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.08it/s]
Device set to use cpu


In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embeddings)

loader = TextLoader("knowledge.txt")
docs = loader.load()



text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,  # chunk size (characters)
    chunk_overlap=30,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1,  # Each chunk is 1 line
#     chunk_overlap=0,
#     separators=["\n"],  # Split on newlines only
#     add_start_index=True,
# )

all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")


_ = vector_store.add_documents(documents=all_splits)


Split blog post into 11 sub-documents.


In [3]:
from langchain import hub
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict


# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"], k=1)  # Limit to top 1 results
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = chat_model.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

response = graph.invoke({"question": "test test_exp_xpu_float32 failed, what is the reason of the failure?"})
print(response["answer"])



<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: test test_exp_xpu_float32 failed, what is the reason of the failure? 
Context: According to #1214, exp and log operation has random failure on PVC, investigation is TBD. 
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

The test test_exp_xpu_float32 failed due to a random failure in the exp and log operation on a PVC (Persistent Volume Claim). The reason for this failure is still under investigation (TBD).
