In [1]:
import pandas as pd
from advanced_rag import AdvancedRAG, EmbeddingModelType, ChainType
from basic_rag import answer_query as basic_answer_query
# from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util

In [2]:
def get_score(pred: str, gold: str, semantic_model) -> float:
    emb_pred = semantic_model.encode(pred, convert_to_tensor=True)
    emb_gold = semantic_model.encode(gold, convert_to_tensor=True)
    sim = util.cos_sim(emb_pred, emb_gold).item()
    return sim

In [3]:
self_llm = AdvancedRAG()

Make sure the Ollama server is running (use 'ollama serve' in another terminal)
.
The server must be active once per session before starting this script.

Loading sentence-transformer embeddings...
Loading existing FAISS vector database...
Loading local Llama 3.1 model via Ollama...


In [4]:
llm_prompt = "Please score the level of correctness of the predicted answer based on the gold answer. You should give a high score to an answer that contains at least the elements contained in the gold answer. If a few additionnary elements are present in the predicted answer, you can still give a high score for the answer. If elements of the predicted answer are contradicting the gold answer, the final score should be low. 1 is a perfect score (all elements aôf the gold answer a in the predicted answer). O is a very low score (None of the elements of the gold answer are in the predicted answer).\n Gold answer : {gold_answer}. Predicted answer : {predicted_answer}. Please only give your score with no comments or explanations."
gold_answer = "Big Brother is said to be the leader and ruler of Oceania. He has black hair and a black mustache and looks calm and powerful."

In [5]:
pred_answer = "Big Brother in 1984 symbolizes the Party’s absolute power and control over the people, serving as a constant reminder that citizens are always being watched. He represents both a figure of authority and an instrument of fear, ensuring obedience through surveillance and propaganda. His image is typically described as a stern, mustachioed face, often gazing directly forward, giving the impression that he is watching each person individually."

In [6]:
sim_score = get_score(
    pred=pred_answer, gold=gold_answer, semantic_model=SentenceTransformer('all-MiniLM-L6-v2')
)
sim_score

0.6392040252685547

In [7]:
for _ in range(5):
    sim_score = float(self_llm.get_direct_llm_answer(llm_prompt.replace("{gold_answer}", gold_answer).replace("{predicted_answer}", pred_answer)))
    print(sim_score)

0.8
0.8
0.8
0.8
0.8


In [8]:
pred_answer = "Big Brother in 1984 symbolizes the Party’s absolute power and control over the people. His image is typically described as a stern, mustachioed face."

In [9]:
sim_score = get_score(
    pred=pred_answer, gold=gold_answer, semantic_model=SentenceTransformer('all-MiniLM-L6-v2')
)
sim_score

0.6651268005371094

In [10]:
for _ in range(5):
    sim_score = float(self_llm.get_direct_llm_answer(llm_prompt.replace("{gold_answer}", gold_answer).replace("{predicted_answer}", pred_answer)))
    print(sim_score)

0.8
0.8
0.8
0.5
0.8


In [24]:
pred_answer = "Big Brother is my neighbors' oldest son. He is tall, blond and looks funny"

In [12]:
sim_score = get_score(
    pred=pred_answer, gold=gold_answer, semantic_model=SentenceTransformer('all-MiniLM-L6-v2')
)
sim_score

0.49509742856025696

In [25]:
for _ in range(5):
    sim_score = float(self_llm.get_direct_llm_answer(llm_prompt.replace("{gold_answer}", gold_answer).replace("{predicted_answer}", pred_answer)))
    print(sim_score)

0.0
0.0
0.0
0.0
0.0


In [22]:
pred_answer = "Big Brother is said to be the leader and ruler of Oceania. He has blond hair and a red mustache and looks angry and stressed."

In [15]:
sim_score = get_score(
    pred=pred_answer, gold=gold_answer, semantic_model=SentenceTransformer('all-MiniLM-L6-v2')
)
sim_score

0.951839029788971

In [23]:
for _ in range(5):
    sim_score = float(self_llm.get_direct_llm_answer(llm_prompt.replace("{gold_answer}", gold_answer).replace("{predicted_answer}", pred_answer)))
    print(sim_score)

0.5
0.5
0.5
0.5
0.5


In [26]:
pred_answer = "Big Brother is the leader and ruler of Oceania. He has black hair."

In [18]:
sim_score = get_score(
    pred=pred_answer, gold=gold_answer, semantic_model=SentenceTransformer('all-MiniLM-L6-v2')
)
sim_score

0.9408539533615112

In [27]:
for _ in range(5):
    sim_score = float(self_llm.get_direct_llm_answer(llm_prompt.replace("{gold_answer}", gold_answer).replace("{predicted_answer}", pred_answer)))
    print(sim_score)

0.5
0.5
0.5
0.5
0.5
