# LLM Development - Retrieval Augmented Generation (RAG)

![](images/llmsql2.jpg)

<h2 style="color:Blue">Restart Kernel</h2>

In [None]:
import os
os._exit(00)

<h2 style="color:Blue">General Imports</h2>

In [3]:
import bs4
import os

<h2 style="color:Blue">Langchain Imports</h2>

In [4]:
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, DirectoryLoader, TextLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate

<h2 style="color:Blue">Pre-Loadings</h2>

<h3 style="color:Green">Question and Vars</h3>

Question: Ask your question

Model ID
- Model 1: hf.co/nomic-ai/nomic-embed-text-v1.5-GGUF:Q6_K
- Model 2: mistral-nemo:latest
- Model 3: codestral:latest
- Model 4: gpt-4o-mini
- Model 5: qwen:14b
- 
keep alive: Number of seconds that the model will be kept alive in memory

num_predict: prediction tokens

In [5]:
question = "Que contiene el Megacombo Galáctico?"
rag_model_id = 2
temperature = 0
keep_alive = 250
num_predict = 256

<h3 style="color:Green">Model Selection</h3>

In [6]:
def get_model(id):
    if id == 1:
        return ("hf.co/SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF:Q6_K", "OllamaEmbeddings")
    elif id == 2:
        return ("mistral-nemo:latest", "OllamaEmbeddings")
    elif id == 3:
        return ("codestral:latest", "OllamaEmbeddings")
    elif id == 4:
        return ("gpt-4o-mini", "OpenAIEmbeddings")
    elif id == 5:
        return ("qwen:14b", "OllamaEmbeddings")

In [7]:
rag_model = get_model(rag_model_id)[0]
rag_embedding = get_model(rag_model_id)[1]

print("RAG Model Selected:", rag_model)
print("RAG Embedding Selected:", rag_embedding)

RAG Model Selected: mistral-nemo:latest
RAG Embedding Selected: OllamaEmbeddings


<h3 style="color:Green">Keys</h3>

In [8]:
key = open("key.txt", "r")
os.environ["OPENAI_API_KEY"] = key.read()
key.close()

key = open("keyls.txt", "r")
#os.environ["LANGCHAIN_PROJECT"] = "gpstrackit-dev"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = key.read()
key.close()

<h3 style="color:Green">LLM Instance</h3>

In [9]:
if rag_model_id == 4:
    llm = ChatOpenAI(model=get_model(rag_model_id)[0], temperature = temperature)
else: 
    llm = ChatOllama(model=get_model(rag_model_id)[0], num_predict = num_predict, keep_alive = keep_alive, temperature = temperature)
llm

ChatOllama(model='mistral-nemo:latest', num_predict=256, temperature=0.0, keep_alive=250)

<h3 style="color:Green">Embeddings</h3>

In [10]:
if rag_embedding == 'OllamaEmbeddings':
    embeddings = OllamaEmbeddings(
        model="hf.co/nomic-ai/nomic-embed-text-v1.5-GGUF:Q6_K",
    )
elif rag_embedding == 'OpenAIEmbeddings':
    embeddings = OpenAIEmbeddings()

embeddings

OllamaEmbeddings(model='hf.co/nomic-ai/nomic-embed-text-v1.5-GGUF:Q6_K', base_url=None, client_kwargs={})

<h2 style="color:Blue">Prompt Execs</h2>

<h3 style="color:Green">Loaders</h3>

In [11]:
web_loader = WebBaseLoader(
    web_paths=("https://lavidaesunvideojuego.com/2024/11/07/la-gran-oscuridad-ha-llegado-a-hearthstone/",
              "https://lavidaesunvideojuego.com/2024/09/05/tips-para-tu-primer-ano-en-stardew-valley/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
web_documents = web_loader.load()

text_loader_kwargs={'autodetect_encoding': True}
text_loader = DirectoryLoader("./", glob="docs/*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
text_documents = text_loader.load()

all_documents = web_documents + text_documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(all_documents)

<h3 style="color:Green">Vector Store</h3>

In [12]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

In [13]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

<h3 style="color:Green">Test Retriever</h3>

In [14]:
retrieved_docs = retriever.invoke(question)
print(len(retrieved_docs))
print(retrieved_docs[0].page_content)

6
Esta nueva expansiÃ³n se presenta en dos versiones, la primera, el Megacombo GalÃ¡ctico, que es como la ediciÃ³n de lujo:

*80 sobres de cartas
*10 sobres de cartas doradas
*1 carta insignia legendaria aleatoria
*1 carta legendaria dorada aleatoria
*6 entradas para la Arena
*El dorso de carta y el diseÃ±o de hÃ©roe Sacerdote de Profeta Velen.

Y la opciÃ³n mÃ¡s casual, el Combo Estelar:

*60 sobres de cartas
*2 cartas legendarias aleatorias
*El dorso de carta de Profeta Velen


<h2 style="color:Blue">Prompts Execs</h2>

<h3 style="color:Green">Prompt Orders</h3>

In [17]:
prompt = hub.pull("rlm/rag-prompt")
question = "quien es Ferhan Sakal?"

<h3 style="color:Green">Exec And Results</h3>

In [18]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm.bind(stop = ["Answer:", "<|im_end|>"])
    | StrOutputParser()
)

for chunk in rag_chain.stream(question):
    print(chunk, end="", flush=True)

Ferhan Sakal es el jefe de excavación y gestión de sitios arqueológicos de los Museos de Qatar. Es experto en arte rupestre y ha afirmado que algunas tallas en Al Jassasiya son únicas y no se pueden encontrar en ningún otro lugar.