# RAG implementation
### STEPS
1. **CLEANING THE DATA**
- Removing `unnecessary docs`, updating docs, removing conflicting information
2. **READING THE DATA** 
- Python, OCR, etc
3. **CHUNKING** 
- `small chunks`: more relevant info, but no context; smaller prompts
- `large chunks`: more context; costly prompts
4. **EMBEDDING** >> **VECTOR DB**
- `different models` for embeddings
- different VDB
- adding `metadata`, `chapter names`, other info to improve retrieval
- storing hypothetical questions that each chunk responds to, and not the chunk itself
5. **FINE-TUNING** 
- To remove `hallucinations` and improve performance. 
- `on the data`: prompt + chunks >> correct answer (including "I don't know") 
- just reating the database to improve the performance (much less effective)
6. **RETRIEVAL** 
- Retreaving more chunks >> `reranking` with anohter model
- Cosine similarity + `other methods`: keywords
- `Rewriting` user's question to be more inline with the documents style 
- `Cash FAQs` and the answers. Check first if the Q is similar to FAQ: \ 
        1). Q FAQ similarity > thresh => return Answer \
        2). thresh2 < Q FAQ similarity < thresh => add Answer to the prompt as another chunk \
        3). otherwise => use normal strategy with chunks
7. **EVALUATION**
- guardrails ai  
- phoenix rag evaluation
8. **OPTIMIZATION**
- proprietary model (trained, fine-tuned)
- cheaper models for Q rewriting and creation of evaluation data
- quantization of the model (before putting in prodd), LORA (low rank adoptation of the weights)
- 
9. **PRODUCTION**
- telegram bot + whisper 
- app in selenium 

## Imports

In [None]:
! pip install chromadb==0.4.18
! pip install ipython==8.18.1
! pip install llama_index==0.9.13

# also necessary for used llamaindex functionalities
! pip install pypdf==4.0.1
! pip install spacy==3.7.2
! pip install guardrails-ai==0.3.2
! pip install openpyxl==3.1.2
! pip install openai-whisper==20231117 python-telegram-bot==20.7
! pip install pydub==0.25.1

In [None]:
import os
import openai
import chromadb
import pandas as pd
from functools import partial
from llama_index import (
    VectorStoreIndex, 
    SimpleDirectoryReader, 
    ServiceContext, 
    StorageContext,
    load_index_from_storage,
)
from llama_index.evaluation import (
    DatasetGenerator, 
    RelevancyEvaluator, 
    ResponseEvaluator, 
    FaithfulnessEvaluator, 
    QueryResponseEvaluator,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.schema import TextNode
from llama_index.node_parser import SentenceSplitter
from llama_index.postprocessor import TimeWeightedPostprocessor, SimilarityPostprocessor
from llama_index.vector_stores import ChromaVectorStore
from llama_index.output_parsers import GuardrailsOutputParser
from llama_index.prompts import PromptTemplate
from llama_index.response_synthesizers import (
    get_response_synthesizer,
    BaseSynthesizer,
    TreeSummarize,
)
from embedding_manager import Embeddings
from llm_manager import LLMMain 

import guardrails as gd
from pydantic import BaseModel, Field
from typing import List
from guardrails.validators import QARelevanceLLMEval, TwoWords, ToxicLanguage, ProvenanceV1, SimilarToDocument

from project_dirs import PROJECT_DIR, DATA_DIR, OUTPUT_DIR
from utils import (
    load_config, 
    list_all_filepaths_for_list_of_extentions, 
    list_all_filepaths, 
    get_eval_df
)
from data_loader import DataLoader, DatabaseManager
from query_engine import RAGStringQueryEngine

import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# To avoid RuntimeError: asyncio.run() cannot be called from a running event loop when running in Jupyter
import nest_asyncio
nest_asyncio.apply()

%load_ext autoreload
%autoreload 3

## Parameters, Env vars

In [None]:
cnf = load_config(cnf_dir=PROJECT_DIR, cnf_name="config.yml")

os.environ['OPENAI_API_TYPE'] = cnf['openai_api_type']
os.environ["OPENAI_API_VERSION"] = cnf['openai_api_type']
os.environ['OPENAI_API_KEY'] = open(os.path.join(PROJECT_DIR, "keys", cnf['openai_key_file']), "r").read().strip("\n")

# os.environ['AZURE_EMBEDDING_MODEL'] = cnf['azure_embeddign_model']
# os.environ['AZURE_LLM_MODEL'] = cnf['azure_llm_model']
# os.environ['AZURE_DEPLOYMENT_NAME'] = cnf['azure_deployment_name']
# os.environ['AZURE_EMBEDDING_DEPLOYMENT_NAME'] = cnf['azure_deployment_name_embeddigns']
# os.environ['AZURE_OPENAI_API_KEY'] = open(os.path.join(PROJECT_DIR, "keys", cnf['azure_openai_key_file']), "r").read().strip("\n")
# os.environ['AZURE_OPENAI_ENDPOINT'] = cnf['azure_openai_api_endpoint']
# os.environ['AZURE_API_VERSION'] = cnf['azure_openai_api_version']
# os.environ['OPENAI_API_BASE'] = cnf['azure_openai_api_endpoint']

db_path = os.path.join(PROJECT_DIR, 'vector_store')
db_collection_name = cnf['db_collection_name']
embedding_mode = cnf['embedding_mode']
llm_mode = cnf['llm_mode']
llm_model_path = cnf['llm_model_path']
embedding_mode = cnf['embedding_mode']
local_embeddings_model_name = cnf['local_embeddings_model_name']
chunk_size = cnf['chunk_size']
chunk_overlap = cnf['chunk_overlap']
data_path = os.path.join(DATA_DIR, 'main_data')

indexid = f'{db_collection_name}_index'
index_path = os.path.join(PROJECT_DIR, 'vs_index')

qa_prompt = PromptTemplate(
    "Le informazioni contestuali sono riportate di seguito.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Date le informazioni di contesto e non la conoscenza pregressa, "
    "rispondi alla query.\n"
    "Se l'informazione non è nel contesto rispondere 'Informazione non trovata'.\n"
    "Query: {query_str}\n"
    "Answer: "
)

## LLM, Embeddings

In [None]:
# Main LLM
main_llm = LLMMain(llm_mode, llm_model_path)
llm = main_llm.llm

# Embedding Model 
embedding = Embeddings(embedding_mode, local_model_name=None)
embedding_model = embedding.embedding_model


## Load Data

In [None]:
# Chroma Database 
db_manager = DatabaseManager(db_path=db_path, collection_name=db_collection_name)
db_collection = db_manager.get_db()

# Vector Store and Index for main data
vector_store = ChromaVectorStore(chroma_collection=db_collection)

service_context = ServiceContext.from_defaults(embed_model=embedding_model, llm=llm)

# Retreive if exists otherwise create new
try:
    storage_context = StorageContext.from_defaults(vector_store=vector_store,
                                                  persist_dir=index_path)
    index = load_index_from_storage(
        service_context=service_context, # to get correctly the models too
        storage_context=storage_context, 
        index_id=indexid, 
        llm=None
    )
    logger.info(f"Loaded {indexid} from local path {index_path}")
    
except Exception as e:
    print(f"ERROR:{e}")
    logger.info("Creating the vector index")
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # service_context = ServiceContext.from_defaults(embed_model=embedding_model, llm=llm)
    
    # Load and chunk data files
    file_paths = list_all_filepaths_for_list_of_extentions(common_dir=data_path)
    data_loader = DataLoader(file_paths=file_paths)
    data = data_loader.read_data()
    chunks = data_loader.chunk_data(
        data, 
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap,
    )
    excluded_embed_metadata_keys=[
        'file_type', 
        'file_size', 
        'creation_date', 
        'last_modified_date', 
        'last_accessed_date'
    ]
    for chunk in chunks:
        chunk.excluded_embed_metadata_keys = excluded_embed_metadata_keys
    index =  VectorStoreIndex(
        chunks, storage_context=storage_context, service_context=service_context
    )
    index.set_index_id(indexid)
    index.storage_context.persist(persist_dir=index_path)
  

In [None]:
# Vector Store and Index for Excel QUESTIONS
index_id_faq = 'faq_indexid'
db_collection_faq_name = "faq"

# Chroma Database 
db_manager_faq = DatabaseManager(db_path=db_path, collection_name=db_collection_faq_name)
db_collection_faq = db_manager_faq.get_db()

vector_store_faq = ChromaVectorStore(chroma_collection=db_collection_faq)
service_context_faq = ServiceContext.from_defaults(embed_model=embedding_model, llm=llm)

# Retreive if exists otherwise create new
try:
    storage_context_faq = StorageContext.from_defaults(vector_store=vector_store_faq,
                                                  persist_dir=index_path)
    index_faq = load_index_from_storage(
        service_context=service_context_faq, # to get correctly the models too
        storage_context=storage_context_faq, 
        index_id=index_id_faq, 
        llm=None
    )
    logger.info(f"Loaded {indexid} from local path {index_path}")
    
except Exception as e:
    print(f"ERROR:{e}")
    logger.info("Creating the vector index")
    storage_context_faq = StorageContext.from_defaults(vector_store=vector_store_faq)
    service_context_faq = ServiceContext.from_defaults(embed_model=embedding_model, llm=llm)
    
    # Load and chunk data files
    excel_files = list_all_filepaths(common_dir=DATA_DIR, folder='',extension='xlsx')
    faq_df = pd.read_excel(excel_files[0])
    excel_questions = faq_df.Domanda.values
    excel_nodes = [TextNode(text=i) for i in excel_questions]

    index_faq =  VectorStoreIndex(
            excel_nodes, storage_context=storage_context_faq, service_context=service_context_faq
        )
    index_faq.set_index_id(index_id_faq)
    index_faq.storage_context.persist(persist_dir=index_path)
  

## Definizione di guardrails, query_engine

In [None]:
# Query engine
####################################################################################
# synthesizer = get_response_synthesizer(response_mode="compact")
synthesizer = get_response_synthesizer(response_mode="tree_summarize")

similarity_postprocessor = SimilarityPostprocessor(similarity_cutoff=cnf['similarity_cutoff']) 
rerank_postprocessor = TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=2)
# from llama_index.postprocessor import RankGPTRerank
# gpt_preprocessor = RankGPTRerank(top_n=2, llm=llm)

retriever = index.as_retriever(similarity_top_k=4)

query_engine = RAGStringQueryEngine(
    retriever=retriever,
    response_synthesizer=synthesizer,
    llm=llm,
    qa_prompt=qa_prompt,
    # postprocessor=similarity_postprocessor,
    postprocessors=[similarity_postprocessor, rerank_postprocessor],
)

# configure FAQ retriever
####################################################################################
retriever_faq = VectorIndexRetriever(
    index=index_faq,
    similarity_top_k=1,
)
faq_similarity_threshold = cnf['faq_similarity_threshold']

# Guardrails
####################################################################################
# QARelevanceLLMEval 
qa_relevance_guard = gd.Guard.from_string(
    validators=[
        QARelevanceLLMEval(on_fail="fix", llm_callable="gpt-3.5-turbo"),
    ],
    description="",
)

# ProvenanceV1
def query_function(text: str, k: int, sources) -> List[str]:
    # sources in ascending order
    return sources[::-1][:k]

provenance_v1_guard = gd.Guard.from_string(validators=[
    ProvenanceV1(llm_callable="gpt-3.5-turbo", on_fail="fix", validation_method="full",) #validation_method="sentence"
])

def reformulate_question(llm, question):
    prompt = f"Riscrivere la domanda per rimuovere tutte le informazioni irrilevanti e lasciare solo domande chiare e concise. Domanda:{question}" 
    return str(llm.complete(prompt))

## Telegram bot

In [None]:
# INSTALL FFMPEG FIRST!!
# sudo apt install ffmpeg 
# OR / AND
# ! conda install --y ffmpeg

from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes, CallbackContext
from telegram import Update
from pydub import AudioSegment
import os
from project_dirs import DATA_DIR
import whisper
whisper_model = whisper.load_model("base")

from pydub import AudioSegment
AudioSegment.converter = "/opt/conda/bin/ffmpeg"
AudioSegment.ffmpeg = "/opt/conda/bin/ffmpeg"
AudioSegment.ffprobe = "/opt/conda/bin/ffprobe"

from utils import append_to_path
for path in [
    "/opt/conda/bin/ffmpeg",
    "/opt/conda/bin/ffprobe"
]:
    append_to_path(path)

from typing import Final
TOKEN: Final = open(os.path.join(PROJECT_DIR, "keys", cnf['tg_token_file']), "r").read().strip("\n")
# BOT_USERNAME: Final = '@qa_rag_bot'


In [None]:
# COMMANDS
async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text(
        """Ciao! Sono un bot che risponde alle domande sul documento 
        'BANDO CONneSSi CONtributi per lo Sviluppo di Strategie digitali 
        per i mercati globali. Anno 2024. Fammi una domanda.""")
    
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text(
        """Scrivi una domanda sul documento
        'BANDO CONneSSi CONtributi per lo Sviluppo di Strategie digitali 
        per i mercati globali. Anno 2024. """)
    
async def custom_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text(
        """.""")

# RESPONSES
def process_question(
    question: str,
    retriever_faq, 
    faq_similarity_threshold,
    faq_df,
    query_engine,
    qa_relevance_guard,
    provenance_v1_guard,
    
) -> str:
    """
    Main processing function
    """
    # To use a better formulated questions
    question = reformulate_question(llm=llm, question=question)
    
    # Check similarity to sample questions (FAQ)
    # Use FAQ answer if the user question is similar (faq_similarity_threshold: 0.9)
    ######################
    most_silmilar_node = retriever_faq.retrieve(question)[0]
    if most_silmilar_node.score > faq_similarity_threshold:
        node_text = most_silmilar_node.text
        faq_answer = faq_df.loc[faq_df.Domanda==node_text, "Risposta"].values[0]
        answer = f"Rispondo alla domanda simile. {node_text}: {faq_answer}"
        
        logging.info("Returning stored answer to the question %s", node_text)
    else:
        answer = 'Informazione non trovata'
        # Query with reranking
        ######################
        raw_answer, source_nodes = query_engine.query(question)
        if len(source_nodes) != 0:
            # Apply guardrails ai
            ######################
            raw_llm_output, validated_output, *rest = qa_relevance_guard.parse(
            llm_output=raw_answer, metadata={'question':question}
            )
            if validated_output:
                raw_llm_output, validated_output, *rest = provenance_v1_guard.parse(
                    llm_output=raw_answer, 
                    metadata={'query_function':partial(query_function, sources=[i.text for i in source_nodes])}
                )
                if validated_output:
                    answer = validated_output
    return answer

async def handle_message(
    update: Update, 
    context: ContextTypes.DEFAULT_TYPE,
):
    message_type: str = update.message.chat.type
    text: str = update.message.text
    
    # logging.info("User %s in %s: '%s'", update.message.chat.id, message_type, text)
    print("User {update.message.chat.id} in {message_type}: {text}")
    
    response: str = process_question(
                        text,
                        retriever_faq, 
                        faq_similarity_threshold,
                        faq_df,
                        query_engine,
                        qa_relevance_guard,
                        provenance_v1_guard,
                    )
    print('Bot:', response)
    await update.message.reply_text(response)

def convert_ogg_to_mp3(ogg_filepath, file_id):
    mp3_filepath = os.path.join(DATA_DIR, f"{file_id}.mp3")
    audio = AudioSegment.from_file(ogg_filepath, format="ogg")
    audio.export(mp3_filepath, format="mp3")
    return mp3_filepath

def convert_speech_to_text(audio_filepath, model):
    data = model.transcribe(audio_filepath)
    return data["text"]
    
async def handle_voice_message(
    update: Update, 
    context: CallbackContext,
):   
    message_type = update.message.chat.type
    file_id = update.message.voice.file_id
    # print(f"file_id {file_id}")
    new_file = await context.bot.get_file(file_id)
    # print(f"new_file {new_file}")  
    await new_file.download_to_drive(f"{file_id}.ogg")

    mp3_filepath = convert_ogg_to_mp3(f"{file_id}.ogg", file_id)
    extracted_text = convert_speech_to_text(mp3_filepath, whisper_model)

    response: str = process_question(
                    extracted_text,
                    retriever_faq, 
                    faq_similarity_threshold,
                    faq_df,
                    query_engine,
                    qa_relevance_guard,
                    provenance_v1_guard,
                )
    print('Bot:', response)
    await update.message.reply_text(response)
    os.remove(f"{file_id}.ogg")
    os.remove(mp3_filepath)
    
# ERRORS    
async def error(update: Update, context: ContextTypes.DEFAULT_TYPE):
    print(f"Update {update} caused error {context.error}")



### Run bot

In [None]:
# whisper_model = whisper.load_model("base")
print('Starting bot')
app = Application.builder().token(TOKEN).build()

# Commands
app.add_handler(CommandHandler('start', start_command))
app.add_handler(CommandHandler('help', start_command))
# app.add_handler(CommandHandler('custom', start_command))

# Messages
app.add_handler(MessageHandler(filters.TEXT, handle_message))
app.add_handler(MessageHandler(filters.VOICE, handle_voice_message))

# Errors
app.add_error_handler(error)

app.run_polling(poll_interval=5)

In [None]:
QA_rag_bot
@qa_

## Evaluate RAG with ResponseEvaluator
https://github.com/nguyenkien1402/llamaindex-practices/blob/main/evaluation-pipeline-rag/rag_evaluation_pipeline.ipynb

In [None]:
from llama_index.evaluation import RetrieverEvaluator
from llama_index.evaluation import generate_question_context_pairs
from llama_index.llms import OpenAI

import nest_asyncio
nest_asyncio.apply()

# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [None]:
nodes = chunks
qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=2
)

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [None]:
retriever1 = index.as_retriever(similarity_top_k=2)
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever1
)

# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)
display_results("Embedding Retriever", eval_results)

In [None]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

In [None]:
display_results("OpenAI Embedding Retriever", eval_results)

## Answers evaluation

In [None]:
queries = faq_df.Domanda.values

In [None]:
# gpt-3.5-turbo
gpt35 = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context_gpt35 = ServiceContext.from_defaults(llm=gpt35)

# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-4")
service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)

In [None]:
# vector_index35 = VectorStoreIndex(chunks, service_context = service_context_gpt35)
# query_engine35 = vector_index35.as_query_engine()

In [None]:
from llama_index.response.schema import Response

from llama_index.evaluation import FaithfulnessEvaluator
faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_gpt4)

from llama_index.evaluation import RelevancyEvaluator
relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_gpt4)

In [None]:
# Testing
eval_query = queries[10]
print(eval_query)

raw_answer, source_nodes = query_engine.query(eval_query)
answer = Response(response=raw_answer, source_nodes=source_nodes)

eval_result = faithfulness_gpt4.evaluate_response(response=answer)
print(f"Faithfulness Evaluation: {eval_result.passing}")

eval_result = relevancy_gpt4.evaluate_response(
    query=eval_query, response=answer
)
print(f"Relevancy Evaluation: {eval_result.passing}")

In [None]:
# Batch analysis
faithfulness_results = []
relevancy_results = []
for eval_query in queries[:10]:
    
    # get answer, build Response
    raw_answer, source_nodes = query_engine.query(eval_query)
    answer = Response(response=raw_answer, source_nodes=source_nodes)
    
    f_result = faithfulness_gpt4.evaluate_response(response=answer)
    r_result = relevancy_gpt4.evaluate_response(
    query=eval_query, response=answer
)
    faithfulness_results.append(f_result)
    relevancy_results.append(r_result)
    
faithfulness_score = sum(result.passing for result in faithfulness_results) / len(faithfulness_results)
relevance_score = sum(result.passing for result in relevancy_results) / len(relevancy_results)


In [None]:
print(f"faithfulness_score: {faithfulness_score}")
print(f"relevance_score: {relevance_score}")

In [None]:
questions = faq_df.Domanda.values
evaluator = ResponseEvaluator(service_context=service_context)

eval_dfs = []
for ix, question in enumerate(questions):
    raw_answer, source_nodes = query_engine.query(question)
    response = Response(response=raw_answer, source_nodes=source_nodes)
    # response = query_engine.query(question)
    logging.info("############  EVALUATING RESULT: %s  ############", ix)
    eval_result = evaluator.evaluate_response(response=response)
    eval_dfs.append(get_eval_df(question, response, eval_result))

In [None]:
correct = eval_df[eval_df['Evaluation Result']=='YES'].shape[0]
perc_correct = correct/eval_df.shape[0]*100
print(f"Correct: {perc_correct :.2f} %, {eval_df.shape[0]} questions")

# Other

### Testing

In [None]:
query_engine = RAGStringQueryEngine(
    retriever=retriever,
    response_synthesizer=synthesizer,
    llm=llm,
    qa_prompt=qa_prompt,
    # postprocessor=similarity_postprocessor,
    postprocessors=[similarity_postprocessor, rerank_postprocessor],
)

In [None]:
question = "dimmi per favore qual'è l'obiettivo del bando connessi?"

answer = 'Informazione non trovata'
# Query with reranking
######################
raw_answer, source_nodes = query_engine.query(question)
if len(source_nodes) != 0:
    # Apply guardrails ai
    ######################
    raw_llm_output, validated_output, *rest = qa_relevance_guard.parse(
    llm_output=raw_answer, metadata={'question':question}
    )
    if validated_output:
        raw_llm_output, validated_output, *rest = provenance_v1_guard.parse(
            llm_output=raw_answer, 
            metadata={'query_function':partial(query_function, sources=[i.text for i in source_nodes])}
        )
        if validated_output:
            answer = validated_output
            
answer

### Guardrails ai examples

In [None]:
# QARelevanceLLMEval 
qa_relevance_guard = gd.Guard.from_string(
    validators=[
        QARelevanceLLMEval(on_fail="fix", llm_callable="gpt-3.5-turbo"),
    ],
    description="",
)

raw_response = response.response
question = questions[0]
raw_llm_output, validated_output, *rest = qa_relevance_guard.parse(
    llm_output=raw_response, metadata={'question':question}
)

# Print the output
print(f"### raw_response ###:\n{raw_response}")
print(f"### question ###:\n{question}")
print(f"### validated_output ###:\n{validated_output}")

### Check similarity with FAQ with Guardrails

In [None]:
### Check with SimilarToDocument if the question is in the excel list of questions #####
excel_files = list_all_filepaths(common_dir=DATA_DIR, folder='',extension='xlsx')
faq_df = pd.read_excel(excel_files[0])
excel_questions = faq_df.Domanda.values

real_question = "Qual è l'obiettivo principale del Bando Wikipedia"
response = None

for excel_q in tqdm(excel_questions[:2]):
    guard = gd.Guard.from_string(
        validators=[
            SimilarToDocument(model='text-embedding-ada-002', document=excel_q, threshold=0.9, on_fail="filter"),
        ],
        description="testmeout",
    )

    raw_output, validated_output, reask, validation_passed, error = guard.parse(
        llm_output=real_question, num_reasks=1
    )
    if validated_output:
        response = faq_df.loc[faq_df.Domanda==excel_q, "Risposta"].values[0]
        break

# Print the output
print(f"### raw_output ###:\n{raw_output}")
print(f"### real_question ###:\n{real_question}")
print(f"### response ###:\n{response}")

### Query with reranking: Version O

In [None]:
rerank_postprocessor = TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=1)

query_engine = index.as_query_engine(
    streaming=False, 
    response_mode="tree_summarize",
    verbose=True,
    similarity_top_k=3,
    node_postprocessors=[rerank_postprocessor], 
    text_qa_template=None,
)
# Query and Print Response
question = excel_questions[0]
response = query_engine.query(question)
print(f"QUESTION: {question}")
print(response.response)

# len(response.source_nodes)
# response.source_nodes[1].node.get_content()

# Create validation questions for the document

In [None]:
import pandas as pd
from llama_index.evaluation import (
    DatasetGenerator, 
    RelevancyEvaluator, 
    ResponseEvaluator, 
    FaithfulnessEvaluator, 
    QueryResponseEvaluator,
)
from llama_index import (
    Response,
    load_index_from_storage,
    SummaryIndex
)
from llama_index.prompts import Prompt
from llama_index.node_parser import SimpleNodeParser
# from llama_index.llms import AzureOpenAI

In [None]:
# create client and a new collection
node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20)

vector_store = ChromaVectorStore(chroma_collection=db_collection)
service_context = ServiceContext.from_defaults(llm=llm,
                                               embed_model=embedding_model,
                                               node_parser=node_parser)

In [None]:
documents = data
data_generator = DatasetGenerator.from_documents(
                      documents,
                      text_question_template=Prompt(
                      "A sample from the documents written in Italian is below.\n"
                      "---------------------\n"
                      "{context_str}\n"
                      "---------------------\n"
                      "Using the documentation sample, carefully follow the instructions below:\n"
                      "{query_str}"
                      ),
                      question_gen_query=(
                          "You are a search pipeline evaluator. Using the papers provided, "
                          "you must create a list of summary questions in Italian. "
                          "Limit the queries to the information supplied in the context.\n"
                          "Question: "
                      ),
                      service_context=service_context)

In [None]:
# To avoid RuntimeError: asyncio.run() cannot be called from a running event loop when running in Jupyter
import nest_asyncio
nest_asyncio.apply()

In [None]:
generated_questions  = data_generator.generate_questions_from_nodes(num=10)
print(f"Generated {len(generated_questions)} questions.")

# save the questions into a txt file for resuse later on
out_file_path = os.path.join(OUTPUT_DIR, "validation_questions.txt")
with open(out_file_path, "w") as f:
    for question in generated_questions:
        f.write(f"{question.strip()}\n")

In [None]:
out_file_path = os.path.join(OUTPUT_DIR, "validation_questions.txt")
with open(out_file_path, 'r') as f:
    generated_questions = f.readlines()
    generated_questions = [line.rstrip() for line in generated_questions]

In [None]:
generated_questions