In [None]:
%pip install -q llama-index llama-index-llms-mistralai llama-index-embeddings-mistralai
%pip install -q llama-index-core llama-index-embeddings-fastembed gradio

In [None]:
import os

os.environ["MISTRAL_API_KEY"] = '<apiKey>'

In [None]:
import os
import requests

os.makedirs("data", exist_ok=True)

# text_url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
# response = requests.get(text_url)
# url_path = "data/artificial_intelligence.txt"

# with open(pdf_path, "w", encoding="utf-8") as f:
#     f.write(response.text)
# print("Downloaded and saved as artificial_intelligence.txt")

#!wget 'https://lmu.edu.ng/XownCMS/cv/FALADE_SUNDAY_A.%20CV.pdf' -O './sample3.pdf'


pdf_url = "https://lmu.edu.ng/XownCMS/cv/FALADE_SUNDAY_A.%20CV.pdf"
response = requests.get(pdf_url)
pdf_path = "data/sample.pdf"

with open(pdf_path, "wb") as f:
    f.write(response.content)
print("Downloaded and saved into data directory")

In [None]:
# import shutil
# shutil.rmtree('storage')

In [None]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.llms.mistralai import MistralAI
from llama_index.core import Settings
from llama_index.embeddings.fastembed import FastEmbedEmbedding

In [None]:
DATA_DIR = "data"
STORAGE_DIR = "storage"

# "mistral-small", "mistral-medium"
llm = MistralAI(model="mistral-large-latest", temperature=0.1)  
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en")

Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
def build_index():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(STORAGE_DIR)
    return index
     

def load_index():
    storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
    return load_index_from_storage(storage_context)

In [None]:
if not os.path.exists(STORAGE_DIR):
    print("Building new index \n===========================================")
    index = build_index()
else:
    print("Loading existing index \n===========================================")
    index = load_index()

In [None]:
import gradio as gr

query_engine = index.as_query_engine(similarity_top_k=3)

def rag_chat(message, chat_history):
    try:
        response = query_engine.query(message).response
    except Exception as e:
        response = f"Error: {e}"

    chat_history.append((message, response))
    return "", chat_history


In [None]:
with gr.Blocks(title="RAG Chatbot") as demo:
    gr.Markdown("## RAG Chatbot\n Ask questions about the document.")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your question")
    clear = gr.Button("Clear")

    state = gr.State([])

    msg.submit(rag_chat, [msg, state], [msg, chatbot])
    clear.click(lambda: ([], ""), None, [chatbot, msg])

demo.launch()
