In [45]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Helper function to calculate Precision@K, Recall@K, F1@K, and MRR@K
def evaluate_retrieval_at_k(real_contexts, retrieved_contexts, k):
    # Get top-k retrieved contexts
    top_k_retrieved = retrieved_contexts[:k]
    
    # Calculate true positives: how many real contexts are in the top-k retrieved contexts
    tp = len(set(real_contexts).intersection(set(top_k_retrieved)))
    
    # Calculate Precision@K
    precision_k = tp / k
    
    # Calculate Recall@K
    recall_k = tp / len(real_contexts)
    
    # Calculate F1@K
    if precision_k + recall_k > 0:
        f1_k = 2 * (precision_k * recall_k) / (precision_k + recall_k)
    else:
        f1_k = 0
    
    # Calculate MRR@K (if any real context appears in the top-k, use its reciprocal rank)
    mrr_k = 0
    for idx, retrieved in enumerate(top_k_retrieved):
        if retrieved in real_contexts:
            mrr_k = 1 / (idx + 1)
            break
    
    return precision_k, recall_k, f1_k, mrr_k




In [10]:
# Load the CSV file
file_path = '/Users/hadiibrahim/Dev/prima-power-hmi-assistant/ragas_dataset_rag_exp_llm_chain.csv'  # Replace with your file path
df = pd.read_csv(file_path)

In [49]:
import pandas as pd
import ast

# Function to convert string to array of strings
def convert_to_array_of_strings(context_str):
    # Check if the string is formatted like an array (starts with [ and ends with ])
    if context_str.startswith('[') and context_str.endswith(']'):
        try:
            # Safely evaluate the string to a Python list
            context_list = ast.literal_eval(context_str)
            
            # Ensure all elements are strings
            if isinstance(context_list, list):
                context_list = [str(elem) for elem in context_list]
            else:
                # If it's not a list, treat the entire string as a single element
                context_list = [context_str]
        except (ValueError, SyntaxError):
            # If there's an error parsing, wrap the string as a single-element list
            context_list = [context_str]
    else:
        # If it's not an array-like string, treat it as a single-element list
        context_list = [context_str]
    
    return context_list

# Apply the conversion to the 'Real Context' column
df['matched_contexts'] = df['matched_contexts'].apply(convert_to_array_of_strings)

In [52]:
df['contexts'] = df['contexts'].apply(convert_to_array_of_strings)


In [33]:
import re
import pandas as pd

# Function to extract 'page_content' from the string and convert to a list of strings
def extract_page_content(retrieved_context_str):
    # Use regex to find all instances of page_content="..." or page_content='...'
    # This will extract everything inside the page_content field
    pattern = r"page_content=\"(.*?)\"|page_content='(.*?)'"
    
    # Find all matches for the page_content
    matches = re.findall(pattern, retrieved_context_str)
    
    # Extract the matched content (ignoring None values)
    extracted_content = [match[0] if match[0] else match[1] for match in matches]
    
    return extracted_content


# Apply the conversion to the 'Retrieved Context' column
df['retrieved_contexts'] = df['retrieved_contexts'].apply(extract_page_content)

In [55]:


# Specify the value of K for top-K metrics
K = 5  # You can adjust K based on your evaluation needs

# Initialize variables for storing evaluation results
precision_k_list, recall_k_list, f1_k_list, mrr_k_list = [], [], [], []

# Example evaluation loop for top-K retrieval metrics
for index, row in df.iterrows():
    real_contexts = row['matched_contexts']  # Convert stringified list back to list
    retrieved_contexts = row['contexts']  # Convert stringified list back to list

    # Calculate retrieval metrics at K
    precision_k, recall_k, f1_k, mrr_k = evaluate_retrieval_at_k(real_contexts, retrieved_contexts, K)
    
    # Append the metrics to the lists
    precision_k_list.append(precision_k)
    recall_k_list.append(recall_k)
    f1_k_list.append(f1_k)
    mrr_k_list.append(mrr_k)

# Save the evaluation metrics to a new dataframe
retrieval_evaluation_results = pd.DataFrame({
    f'Precision@{K}': precision_k_list,
    f'Recall@{K}': recall_k_list,
    f'F1@{K}': f1_k_list,
    f'MRR@{K}': mrr_k_list
})


In [56]:
retrieval_evaluation_results.mean()

Precision@5    0.129400
Recall@5       0.631500
F1@5           0.214190
MRR@5          0.508417
dtype: float64

In [12]:
import pickle
pickle_file = "/Users/hadiibrahim/Dev/prima-power-hmi-assistant/src/docs.pkl"
with open(pickle_file, "rb") as file:
    docs = pickle.load(file)

In [13]:
docs = [doc.page_content for doc in docs]

In [2]:
from langchain_community.embeddings import (
    HuggingFaceBgeEmbeddings,
    HuggingFaceEmbeddings,
)
embeddings = HuggingFaceBgeEmbeddings(
                model_name="BAAI/bge-large-en-v1.5",  # or sentence-trainsformers/all-MiniLM-L6-v2
                model_kwargs={"device": "cpu"},
                encode_kwargs={"normalize_embeddings": True},
            )

  from tqdm.autonotebook import tqdm, trange


In [9]:
doc_embeddings = embeddings.embed_documents(docs)
# Compute embeddings for docs

In [29]:
queries_embeddings = embeddings.embed_documents(df['question'].tolist())

In [15]:
import pandas as pd
df = pd.read_csv('/Users/hadiibrahim/Dev/prima-power-hmi-assistant/src/data/final_selection.csv')

In [17]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Function to find the most similar document for each context array or string
def match_contexts(context_input):
    # If the context_input is a string, convert it to a list
    if isinstance(context_input, str):
        context_list = [context_input]
    else:
        context_list = context_input

    # Compute embeddings for the current list of contexts
    context_embeddings = embeddings.embed_documents(context_list)
    
    # Compute cosine similarities between each context and all documents
    similarities = cosine_similarity(context_embeddings, doc_embeddings)
    
    # Find the index of the most similar doc for each context
    matched_docs_indices = np.argmax(similarities, axis=1)
    
    # Get the matched docs based on the indices
    matched_docs = [docs[idx] for idx in matched_docs_indices]
    
    return matched_docs


In [19]:

df['matched_questions'] = df['question'].apply(match_contexts)

In [31]:
import torch
import torch.nn.functional as F
import re
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Assuming you already have embeddings for queries and contexts
# queries_embeddings: list of query embeddings (torch tensors)
# contexts_embeddings: list of context embeddings (torch tensors)
# contexts: the actual context strings

# Function to check if the query mentions an alarm and to extract the alarm ID
def is_alarm_query(query):
    if "alarm" not in query.lower():
        return False, None

    # Use regex to find a possible Alarm ID (including formats like "324/322")
    alarm_id_pattern = r"\b\d+(?:/\d+)?\b"
    alarm_ids = re.findall(alarm_id_pattern, query)
    
    return bool(alarm_ids), alarm_ids[0] if alarm_ids else None

# Function to get embedding for Alarm ID using the specified embedding model
def get_alarm_id_embedding(alarm_id, embeddings):
    alarm_id_embedding = embeddings.embed_query(alarm_id)
    return torch.tensor(alarm_id_embedding).unsqueeze(0)  # Add batch dimension

# Function to get the most similar context for each query
def get_most_similar_contexts(queries, queries_embeddings, contexts, contexts_embeddings, embeddings, alarm_weight=2.0):
    most_similar_contexts = []

    for i, query in enumerate(queries):
        query_embedding = torch.tensor(queries_embeddings[i]).unsqueeze(0)  # Add batch dimension
        
        # Check if it's an alarm query and extract the Alarm ID
        is_alarm, alarm_id = is_alarm_query(query)
        
        if is_alarm:
            # Get the embedding for the Alarm ID
            alarm_id_embedding = get_alarm_id_embedding(alarm_id, embeddings)
            
            # Modify the query embedding by giving more weight to the Alarm ID
            combined_query_embedding = (query_embedding + alarm_weight * alarm_id_embedding) / (1 + alarm_weight)
        else:
            combined_query_embedding = query_embedding
        
        # Compute similarity with all context embeddings
        similarities = []
        for context_embedding in contexts_embeddings:
            context_embedding_tensor = torch.tensor(context_embedding).unsqueeze(0)  # Add batch dimension
            similarity = F.cosine_similarity(combined_query_embedding, context_embedding_tensor).item()
            similarities.append(similarity)
        
        # Find the most similar context
        most_similar_index = similarities.index(max(similarities))
        most_similar_contexts.append((query, contexts[most_similar_index], max(similarities)))

    return most_similar_contexts






In [33]:
# Find the most similar context for each query
most_similar_contexts = get_most_similar_contexts(
    queries=df['question'].tolist(), 
    queries_embeddings=queries_embeddings, 
    contexts=docs, 
    contexts_embeddings=doc_embeddings, 
    embeddings=embeddings
)


In [1]:
import pandas as pd
df = pd.read_csv('/Users/hadiibrahim/Dev/prima-power-hmi-assistant/ragas_dataset_rag_exp_ensemble.csv')

In [2]:
df

Unnamed: 0,question,answer,contexts,ground_truth,original_contexts
0,What does the left side of the interface displ...,The left side of the interface displays a mess...,['The main part of the interface has a blue ba...,"The left side under MDA displays ""Next stop La...",The interface displays operational information...
1,What does the highlighted state of the Edit bu...,The highlighted state of the Edit button likel...,['The Edit button appears to be highlighted in...,The highlighted state of the Edit button indic...,The text discusses the appearance and function...
2,Which application is used to manage Tulus lice...,The License Management application is used to ...,['The arrows between the screenshots suggest t...,The License Management application is used to ...,Tulus licenses and all related options are man...
3,What buttons are available for managing entrie...,The buttons available for managing entries in ...,['1. At the top right corner of the interface ...,The buttons available for managing entries in ...,The interface has three buttons at the top rig...
4,What is the purpose of MDA mode in the interface?,The purpose of MDA mode in the interface is to...,['3. TULUS Mda\nThe MDA Manual Data Automatic...,MDA mode allows the running of utility program...,MDA mode allows the running of utility program...
...,...,...,...,...,...
290,What purpose does the comment text box serve?,The comment text box serves as a space for use...,"['Alarm ID is: 6104, Source type is: PLC alarm...",The comment text box allows the user to enter ...,A text box is provided where the user can ente...
291,What type of values are contained in the table...,"The table in Panel number 5, titled ""Thickness...",['5. Panel number 5 is titled Thicknesses and ...,The table contains specific numerical values f...,Panel number 5 is titled Thicknesses and featu...
292,In what mode can the axes movement take place ...,Axes movement can only take place in manual mode.,['8. TULUS Control panel\nCertain basic funct...,The axes movement can only take place in manua...,The Tulus Control Panel allows users to manage...
293,What are some of the parameters displayed in t...,The parameters displayed in the lower part of ...,"['Alarm ID is: 772, Source type is: Servo alar...","Position, Enable, Model, Diameter, Serial code...",The interface is divided into several sections...


In [4]:
import pandas as pd
df = pd.read_csv('/Users/hadiibrahim/Dev/prima-power-hmi-assistant/ragas_dataset_rag_exp_naive.csv')
df[758:]

Unnamed: 0,question,answer,contexts,ground_truth,original_contexts,retrieved_contexts
758,What is the purpose of the toolbar in the appl...,The purpose of the toolbar in the application ...,['On the left side there is a vertical toolbar...,The toolbar provides quick access to different...,The application features a toolbar with tabs l...,[Document(metadata={'source': 'TULUS manual so...
759,How is the appearance of the interface described?,The interface has a sleek and modern appearanc...,['Its a simple stylized representation that co...,The interface has a sleek and modern appearanc...,The interface has a sleek and modern appearanc...,[Document(metadata={'source': 'TULUS manual so...
760,"What does the tagline ""THE SOFTWARE"" suggest a...","The tagline ""THE SOFTWARE"" suggests that Tulus...",['Creating the ProblemZip file. space\n\n \n\n...,It suggests that Tulus is the name of the soft...,The interface includes several sections with d...,[Document(metadata={'source': 'TULUS manual so...
761,What material properties can be inputted in th...,"Based on the provided context, the following m...",['1. At the top right corner of the interface ...,The material properties that can be inputted i...,The text describes a section in the software i...,[Document(metadata={'source': 'TULUS manual so...
762,"What does the field labeled ""Backup file to be...","The field labeled ""Backup file to be made"" spe...",['The Save button appears to be highlighted su...,The field specifies the file path where the ba...,There is a field labeled Backup file to be mad...,[Document(metadata={'source': 'TULUS manual so...
...,...,...,...,...,...,...
995,"What does Earth-icon on Licenses page signify,...",The Earth-icon on the Licenses page is not exp...,['Yellow trial license. The buttons on the Lic...,The Earth-icon on the Licenses page is a symbo...,['Yellow trial license. The buttons on the Lic...,[Document(metadata={'source': 'TULUS manual so...
996,"Where in Tulus interface, apart from help view...","The system settings, licenses, and backups in ...",['6. TULUS System management and settings\nTh...,The tools needed for general management of the...,['6. TULUS System management and settings\nTh...,[Document(metadata={'source': 'TULUS manual so...
997,How do you set up and oversee various backups ...,To set up and oversee various backups in a dat...,['The image appears to be a screenshot of a gr...,To set up and oversee various backups in a dat...,['Each box has an arrow pointing downward. Bel...,[Document(metadata={'source': 'TULUS manual so...
998,"What app manages Tulus license activation, usi...",The License Management application manages Tul...,['An activated license is valid only for the s...,The License Management application is used to ...,['The arrows between the screenshots suggest t...,[Document(metadata={'source': 'TULUS manual so...
