# REQUIRED LIBRARIES

In [1]:
!pip install -q pypdf
!pip install -q python-dotenv
!pip install -q transformers
!pip install -q llama-index
!pip -q install sentence-transformers
!pip install langchain
!pip install llama_index
!pip install --upgrade llama_index



In [2]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install  llama-cpp-python --no-cache-dir



# Google Colab Authentication Step

In [10]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
/gdrive


In [13]:
!ls "/gdrive/My Drive/Chatbot_with_Mistral_7B_LLM_LlamaIndex/"

Data  Mistral7_llamaindex_Rag.ipynb


In [14]:
import os
os.chdir("/gdrive/My Drive/Chatbot_with_Mistral_7B_LLM_LlamaIndex/")

In [15]:
!ls

Data  Mistral7_llamaindex_Rag.ipynb


# RAG Implementation Chatbot with Mistral 7B LLM LlamaIndex

In [16]:
import logging
import sys

#Shows information and error messages during program execution using the module.
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

#VectorStoreIndex=This directory contains vector representation of text documents.
#SimpleDirectoryReader=used to read text documents in a specified directory.
#ServiceContext=used to manage service context
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

In [17]:
#Read text documents in specified directory
documents = SimpleDirectoryReader("/gdrive/My Drive/Chatbot_with_Mistral_7B_LLM_LlamaIndex/Data/").load_data()

In [18]:
import torch

from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt

#lamaCPP an instance of the llm class is created.
llm = LlamaCPP(
    # Specifies the URL of the GGML (Generative Language Model) model to use
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',

    #controls the variety of output produced by the model.
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,

    # Specifies the size of the context window the model will use.
    context_window=3900,

    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()

    #ensures that all layers of the model run on the GPU if possible.
    model_kwargs={"n_gpu_layers": -1},
    # Specifies the functions used to convert inputs to Llama 2 format.
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [19]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding

#gte-large = General Text Embeddings (GTE) model. Towards General Text Embeddings with Multi-stage Contrastive Learning
#LangchainEmbedding=represents the language embedding model
# It creates a language embedding model.

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)


In [20]:
# ServiceContext is a class designed to facilitate the processing of documents through a specific text embedding model and language model.

service_context = ServiceContext.from_defaults(
    #Specifies how many token pieces the documents will be divided into when processing
    chunk_size=256,
    #llm was defined in the previous code block
    llm=llm,
    #Includes the language embedding model (embed_model) to be used for embedding documents.
    embed_model=embed_model
)

In [21]:
#It creates an index by calculating vector representations of documents.
# contains vector representations and can be used to measure similarities between documents using these representations
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [22]:
# The query engine creates and then returns similar documents for a given query.
query_engine = index.as_query_engine()
#represents the answer obtained as a result of the query
response = query_engine.query("who is atlas")

In [23]:
print(response)

 Atlas is a Titan in Greek mythology. He was known for his enormous strength, which was greater than that of his brothers. However, his disposition was less quarrelsome compared to the other Titans. Atlas was not imprisoned with the other Titans who had fought against the gods and instead was made to stand and hold up the sky on his head and hands.


In [None]:
while True:
  query=input()
  response = query_engine.query(query)
  print(response)

who is hades?


Llama.generate: prefix-match hit


 Hades is the king of the underworld in Greek mythology. He is the brother of Neptune and Jupiter, and the husband of Persephone. He is often depicted as a bearded figure with a helmet and a trident, and he is associated with death and the afterlife.
What happened between echo and narcissus?


Llama.generate: prefix-match hit


 In Greek mythology, Echo was a nymph who talked too much and was very fond of having the last word. One day she spoke rudely to the great Juno, and as punishment, Juno decreed that Echo should never use her voice again unless to repeat what she had just heard. Echo was ashamed and hid herself in the forest.

Narcissus was a young man who had hair as yellow as gold and eyes as blue as the sky, which was very rare in Greece where most people were dark. He used to hunt in the forest where Echo was hiding. One day Narcissus became separated from his friends and heard something rustle among the leaves. He called out "Who's there?" and Echo answered "Here." Narcissus saw Echo peeking out shyly from a cave or behind a great tree and admired her very much.
