## Retrieval Augmented Generation (RAG) with LlamaIndex and Mistral LLM

In [1]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
from llama_index.core import Settings,SimpleDirectoryReader,ServiceContext,VectorStoreIndex,Document,load_index_from_storage,StorageContext
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

from langchain_community.chat_models import ChatOllama

from langchain_community.embeddings import OllamaEmbeddings

[nltk_data] Downloading package punkt_tab to /home/joaocosentino/.pyen
[nltk_data]     v/versions/user_manual/lib/python3.10/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


### .env and File paths

In [2]:
import nest_asyncio
import os
from dotenv import load_dotenv
nest_asyncio.apply()

load_dotenv('../.env')
llama_cloud = os.getenv('LLMA_CLOUD_API')

short_pdf = "../pdf_files/owner_manual_p283-p300.pdf"
pdf_path = '../pdf_files/owner_manual_full.pdf'

### Models Setup

In [8]:
EMBEDDING_MODEL  = "BAAI/bge-small-en-v1.5"
GENERATION_MODEL = "mistral"

# LLM from Ollama

llm = ChatOllama(model=GENERATION_MODEL,request_timeout=120)

# llm_local = Groq(model="mixtral-8x7b-32768", api_key= groq)
embed_model = FastEmbedEmbedding(model_name=EMBEDDING_MODEL)
#embed_model = OllamaEmbeddings(model="nomic-embed-text",show_progress=True)


Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 33608.21it/s]


### Documents Load

In [5]:
docs = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
documents = Document(text = "\n\n".join([doc.text for doc in docs]))

In [6]:
def build_index(document,llm,embed_model,sentence_window_size=3,save_dir="../vector_store/index"):

    node_parser = SentenceWindowNodeParser(
        window_size=sentence_window_size,
        window_metadata_key='window',
        original_text_metadata_key='original_text'
    )

    Settings.llm = llm
    Settings.embed_model = embed_model
    # Settings.chunk_size = 1024
    Settings.node_parser = node_parser

    if not os.path.exists(save_dir):
        index = VectorStoreIndex.from_documents(
            [document]
        )

        index.storage_context.persist(persist_dir=save_dir)
    else:
      # load the existing index
      index = load_index_from_storage(
          StorageContext.from_defaults(persist_dir=save_dir)
      )

    return index

In [9]:
vector_index = build_index(documents,llm,embed_model)


### Defining Query Engine

In [11]:
def get_query_engine(vector_index,similarity=None,reranker_top=None):

    postproc = MetadataReplacementPostProcessor(target_metadata_key='window')
    reranker = FlagEmbeddingReranker(
        top_n=reranker_top,
        model="BAAI/bge-reranker-large",
    )

    engine = vector_index.as_query_engine(
        similarity_top_k=similarity,
        node_postprocessors=[postproc, reranker]
    )

    return engine

In [12]:
query_engine = get_query_engine(vector_index,6,5)

In [13]:
query = "can you tell me which LLM are you based on?"
response = query_engine.query(query)
print(response)

  lc_message = self._llm.predict_messages(messages=lc_messages, **kwargs)


ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fafc71cdcf0>: Failed to establish a new connection: [Errno 111] Connection refused'))