# Create and run a local RAG pipeline from scratch

Install package

In [None]:
pip install langchain langchain-core langchain-community langchain-experimental langgraph "langserve[all]" langchain-cli langsmith langchain_text_splitters langchain_chroma sentence-transformers langchainhub



In [None]:
pip install -e . -qU langchain-openai

In [None]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [None]:
import getpass
import os

os.environ["COHERE_TRACING_V2"] = "true"
os.environ["COHERE_API_KEY"] = getpass.getpass()

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)

What Language will be supported?

In [None]:
[e.value for e in Language]

In [None]:
file_path  = 'PlatformType.h'
with open(file_path , 'r') as file:
    C_CODE = file.read()
C_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.CPP, chunk_size=50, chunk_overlap=0
)
docs = C_splitter.create_documents([C_CODE])
docs

In [None]:
docs

In [None]:
from langchain_cohere import ChatCohere
llm = ChatCohere(model="command-r")

In [None]:
from langchain_community.vectorstores import (
    Chroma as vectorstore
)

In [None]:
from langchain_community.vectorstores import (
    FAISS as Vectorstore
)

In [None]:
from langchain_community.embeddings import (
    OpenAIEmbeddings as Embedding
)

In [None]:
from langchain_community.embeddings import (
    HuggingFaceEmbeddings as Embedding
)

In [None]:
embedding=Embedding()

In [None]:
pip install faiss-cpu

In [None]:
vectorstore = Vectorstore.from_documents(documents=docs, embedding=embedding)
retriever = vectorstore.as_retriever()

In [None]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

prompt = hub.pull("rlm/rag-prompt")


In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("what is define for int8_t")

len(retrieved_docs)

In [None]:
print(retrieved_docs[0].page_content)


In [None]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

In [None]:
print(example_messages[0].content)


In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("what is define for int8_t"):
    print(chunk, end="", flush=True)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "please code the sum of a and b, with a = 1 and b = 3 in C++, use PLATFORM_TYPES_H format"})
print(response["answer"])

With RAG:

```c++
#include <PLATFORM_TYPES_H>

uint32 sum(uint8 a, uint8 b) {
    return a + b;
}

uint32 result = sum(1, 3);
```

No RAG:
1. please code the sum of a and b, with a = 1 and b = 3 in C++, use PLATFORM_TYPES_H format
#include <platform_types.h>

int main() {
    typedef int32_t a_t;
    typedef int32_t b_t;

    a_t a = 1;
    b_t b = 3;

    return a + b;
}

Note that this is simply a hypothetical example, as there's no universal PLATFORM_TYPES_H file that exists across all C++ implementations. The code defines two integer types, a_t and b_t, and then adds the values of a and b together, which are 1 and 3, respectively


2. please code the sum of a and b, with a = 1 and b = 3 in C++
Sure! Here's the code:
```cpp
#include <iostream>
using namespace std;

int main() {
    int a = 1;
    int b = 3;
    int sum = a + b;

    cout << sum << endl;

    return 0;
}
```
This code defines two integers, a and b, with values 1 and 3 respectively, and then calculates their sum and prints it to the console.

In [1]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You're a helpful assistant"),
    HumanMessage(content="What is the purpose of model regularization?"),
]