In [1]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import dotenv
import time
dotenv.load_dotenv()
import os
import sys
sys.path.append('../..')
from backend.llms.llm_rag import LLMRag
from google import genai
from google.genai.types import HttpOptions
from ragas import EvaluationDataset
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import ContextPrecision,  LLMContextRecall, AnswerRelevancy,  Faithfulness,  BleuScore, RougeScore, FactualCorrectness
import datetime
import json

os.environ["GOOGLE_CLOUD_PROJECT"] = 'clasificationfromdescription'
os.environ["GOOGLE_CLOUD_LOCATION"] = 'us-central1'
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = 'True'
import pandas as pd

In [2]:
#evaluator
llm_eval = ChatOpenAI(model="gpt-4o", api_key=os.getenv('OPENAI_API_KEY'))
evaluator_llm = LangchainLLMWrapper(llm_eval)

In [3]:
from backend.llms.llm_rag import LLMProtocol

class RagResponse():
    def __init__(self, text = None):
        self.text = text
class LLMGemini2(LLMProtocol):
    def __init__(self, model):
        self.model = model
        self.client = genai.Client(http_options=HttpOptions(api_version="v1"))
 
    def process_request(self, message):
       
        response = self.client.models.generate_content(
            model=self.model,
            contents=message,
        )
        response_rag  = RagResponse()
        response_rag.text = response.text
        response_rag.raw_response = response
        return response_rag
    def log(self ):
        pass

In [4]:


# Example response:
# Okay, let's break down how AI works. It's a broad field, so I'll focus on the ...
#
# Here's a simplified overview:
# ...

In [5]:

backend_list = ["gemini-1.0-pro-001","gemini-1.5-flash-001","gemini-1.5-pro-001", "gemini-2.0-flash-001", "gemini-2.0-pro-exp-02-05"]
sumar_results = []
for backend_model in backend_list:
    backend_llm = LLMGemini2(backend_model)
    print(f"====================Model {backend_model} ==================")
    llmrag = LLMRag(backend=backend_llm)
    ##############Read data
    intrebari_df = pd.read_csv("intrebari si raspunsuri validate.csv")#[0:1]
    intrebari_df = intrebari_df.rename(columns={'Întrebare':'user_input', 'Răspuns':'reference'})
    print("------------intrebari Initiale-----------------")
    print(intrebari_df.head())
    ######################## 
    print()
    i=0
    answers_llmrag = []
    for query in intrebari_df["user_input"].values:
        print(f'query{i}')
        i+=1
        full_response = llmrag.process_request(query)
        time.sleep(10)
        relevant_docs = full_response.context
        if full_response.text:   
            response = full_response.text.removeprefix("Folosind informatiile din curs, ")
        else:
            response = 'None'    
        answers_llmrag.append({
                "user_input":query,
                "retrieved_contexts":relevant_docs,
                "response":response
        })

    answers_df = pd.DataFrame(answers_llmrag)
    print(f"raspunsuri {backend_model}")
    print(answers_df.head())
    intrebari_df["retrieved_contexts"] = answers_df['retrieved_contexts']
    intrebari_df["response"] = answers_df["response"]
    dataset = intrebari_df.to_dict(orient = 'records')
    evaluation_dataset = EvaluationDataset.from_list(dataset)
    print(f"evaluation dataset")
    print(evaluation_dataset.to_pandas())
    ###########################Evalute
    
    metrics = [
            ContextPrecision(),
            LLMContextRecall(),
            AnswerRelevancy(),
            Faithfulness(),
            BleuScore(),
            RougeScore(),
            FactualCorrectness()]

    result = evaluate(
        dataset=evaluation_dataset,
        metrics=metrics,
        llm=evaluator_llm)
    

    print(f"sumar result = {result}")
    
    current_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    sumar_results.append([f"ragas_results_{backend_model}_{llmrag.config['chunker_name']}_{current_time_str}.csv", f"{result}"])
    result_df = result.to_pandas()

    intrebari_df = intrebari_df.add_prefix('initial_')
    rezultate_df = pd.concat([intrebari_df, result_df], axis = 1)
    print(f"ragas_results_{backend_model}_{llmrag.config['chunker_name']}.csv")

    rezultate_df.to_csv(f"ragas_results_{backend_model}_{llmrag.config['chunker_name']}_{current_time_str}.csv")

with open("sumar_results.json", "w") as f:
    json.dump(sumar_results, f, indent=2)  # indent=2 makes the JSON human-readable


config {'chunker_name': 'pages', 'similarity_threshold': 0.2, 'top_k': 10, 'no_internal_source_prompt': 'Your only answer should be exactly like this: Nu am gasit informatii in curs despre intrebarea ta', 'internal_source_rules': 'Esti un asistent care raspunde in limba Romana la intrebarea din  "ORIGINAL USER PROMPT" folosind context din "CONTEXT FROM INTERNAL SOURCES". Raspunsul tau trebuie cat mai concis si mai scurt (poate sa fie chiar doar o instructiune SQL) si sa includa(daca e posibil) text din CONTEXT.  Vei raspunde \'Folosind informatiile din curs, \' si apoi adauga raspunsul.'} 
 -------------------------------
Folder exists, no new chroma db
------------intrebari Initiale-----------------
     Generator Dificultate Capitole Principale Capitole Asociate  \
0  O3mini-high         LOW                  C1               NaN   
1  O3mini-high         LOW                  C2               NaN   
2  O3mini-high         LOW                  C3               NaN   
3  O3mini-high    

Evaluating:   0%|          | 0/553 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[20]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[6]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[34]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[55]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[83]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be

sumar result = {'context_precision': 0.2883, 'context_recall': 0.3650, 'answer_relevancy': 0.2605, 'faithfulness': 0.7517, 'bleu_score': 0.0235, 'rouge_score': 0.2202, 'factual_correctness': 0.2903}
ragas_results_gemini-1.0-pro-001_pages.csv
config {'chunker_name': 'pages', 'similarity_threshold': 0.2, 'top_k': 10, 'no_internal_source_prompt': 'Your only answer should be exactly like this: Nu am gasit informatii in curs despre intrebarea ta', 'internal_source_rules': 'Esti un asistent care raspunde in limba Romana la intrebarea din  "ORIGINAL USER PROMPT" folosind context din "CONTEXT FROM INTERNAL SOURCES". Raspunsul tau trebuie cat mai concis si mai scurt (poate sa fie chiar doar o instructiune SQL) si sa includa(daca e posibil) text din CONTEXT.  Vei raspunde \'Folosind informatiile din curs, \' si apoi adauga raspunsul.'} 
 -------------------------------
Folder exists, no new chroma db
------------intrebari Initiale-----------------
     Generator Dificultate Capitole Principale C

Evaluating:   0%|          | 0/553 [00:00<?, ?it/s]

sumar result = {'context_precision': 0.2854, 'context_recall': 0.3713, 'answer_relevancy': 0.4884, 'faithfulness': 0.6034, 'bleu_score': 0.0673, 'rouge_score': 0.1780, 'factual_correctness': 0.5041}
ragas_results_gemini-1.5-flash-001_pages.csv
config {'chunker_name': 'pages', 'similarity_threshold': 0.2, 'top_k': 10, 'no_internal_source_prompt': 'Your only answer should be exactly like this: Nu am gasit informatii in curs despre intrebarea ta', 'internal_source_rules': 'Esti un asistent care raspunde in limba Romana la intrebarea din  "ORIGINAL USER PROMPT" folosind context din "CONTEXT FROM INTERNAL SOURCES". Raspunsul tau trebuie cat mai concis si mai scurt (poate sa fie chiar doar o instructiune SQL) si sa includa(daca e posibil) text din CONTEXT.  Vei raspunde \'Folosind informatiile din curs, \' si apoi adauga raspunsul.'} 
 -------------------------------
Folder exists, no new chroma db
------------intrebari Initiale-----------------
     Generator Dificultate Capitole Principale

Evaluating:   0%|          | 0/553 [00:00<?, ?it/s]

sumar result = {'context_precision': 0.2818, 'context_recall': 0.3713, 'answer_relevancy': 0.6834, 'faithfulness': 0.5350, 'bleu_score': 0.0473, 'rouge_score': 0.1474, 'factual_correctness': 0.4343}
ragas_results_gemini-1.5-pro-001_pages.csv
config {'chunker_name': 'pages', 'similarity_threshold': 0.2, 'top_k': 10, 'no_internal_source_prompt': 'Your only answer should be exactly like this: Nu am gasit informatii in curs despre intrebarea ta', 'internal_source_rules': 'Esti un asistent care raspunde in limba Romana la intrebarea din  "ORIGINAL USER PROMPT" folosind context din "CONTEXT FROM INTERNAL SOURCES". Raspunsul tau trebuie cat mai concis si mai scurt (poate sa fie chiar doar o instructiune SQL) si sa includa(daca e posibil) text din CONTEXT.  Vei raspunde \'Folosind informatiile din curs, \' si apoi adauga raspunsul.'} 
 -------------------------------
Folder exists, no new chroma db
------------intrebari Initiale-----------------
     Generator Dificultate Capitole Principale C

Evaluating:   0%|          | 0/553 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[41]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[363]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[503]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')
ERROR:ragas.executor:Exception raised in Job[531]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')


sumar result = {'context_precision': 0.2862, 'context_recall': 0.3713, 'answer_relevancy': 0.4300, 'faithfulness': 0.8917, 'bleu_score': 0.0284, 'rouge_score': 0.1242, 'factual_correctness': 0.3165}
ragas_results_gemini-2.0-flash-001_pages.csv
config {'chunker_name': 'pages', 'similarity_threshold': 0.2, 'top_k': 10, 'no_internal_source_prompt': 'Your only answer should be exactly like this: Nu am gasit informatii in curs despre intrebarea ta', 'internal_source_rules': 'Esti un asistent care raspunde in limba Romana la intrebarea din  "ORIGINAL USER PROMPT" folosind context din "CONTEXT FROM INTERNAL SOURCES". Raspunsul tau trebuie cat mai concis si mai scurt (poate sa fie chiar doar o instructiune SQL) si sa includa(daca e posibil) text din CONTEXT.  Vei raspunde \'Folosind informatiile din curs, \' si apoi adauga raspunsul.'} 
 -------------------------------
Folder exists, no new chroma db
------------intrebari Initiale-----------------
     Generator Dificultate Capitole Principale

Evaluating:   0%|          | 0/553 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[426]: TypeError(ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'')


sumar result = {'context_precision': 0.3029, 'context_recall': 0.3713, 'answer_relevancy': 0.4600, 'faithfulness': 0.8713, 'bleu_score': 0.0235, 'rouge_score': 0.1074, 'factual_correctness': 0.3605}
ragas_results_gemini-2.0-pro-exp-02-05_pages.csv
