# Libraries

In [1]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import hashlib
from pinecone import Pinecone
from langchain_openai import OpenAI
from langchain_core.messages import HumanMessage, AIMessage
from sklearn.metrics.pairwise import cosine_similarity
import firebase_admin
import google.cloud
from firebase_admin import credentials, firestore
from prompt_templates import prompt_templates
from google.cloud.firestore_v1.base_query import FieldFilter
from sentence_transformers import CrossEncoder
from collections import Counter
from fuzzywuzzy import fuzz
from dotenv import load_dotenv

  from tqdm.autonotebook import tqdm


# Tools

In [2]:
load_dotenv()

# Firestore Initialization
# credential_path = r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
credential_path = r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path

if not firebase_admin._apps:
    # cred = credentials.Certificate(r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    cred = credentials.Certificate(r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    firebase_admin.initialize_app(cred)

try:
    db = firestore.Client()
    print("*Firestore connected successfully!")
except Exception as e:
    print(f"Failed to connect to Firestore: {e}")

# API Keys Initialization
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY_EVALUATION')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

if not OPENAI_API_KEY:
    print("OpenAI API Key not found!")
if not PINECONE_API_KEY:
    print("Pinecone API Key not found!")

# Pinecone Initialization
try:
    pc = Pinecone(api_key=PINECONE_API_KEY)
    print("*Pinecone connected successfully!")
except Exception as e:
    print(f"Failed to connect to Pinecone: {e}")


# OpenAI Initialization
try:
    client=OpenAI(api_key=OPENAI_API_KEY)
    LLM = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
    EMBEDDINGS = OpenAIEmbeddings(model='text-embedding-3-small')
    print("*OpenAI connected successfully!")
except Exception as e:
    print(f"Failed to connect to OpenAI: {e}")

# CrossEncoder Initialization
try:
    reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
    print("*CrossEncoder connected successfully!")
except Exception as e:
    print(f"Failed to connect to CrossEncoder: {e}")

*Firestore connected successfully!
*Pinecone connected successfully!
*OpenAI connected successfully!
*CrossEncoder connected successfully!


# Queries

In [3]:
query = "Did they decide to use the Switchboard model as the starting point for their speaker adaptation?"
user_id = "WuhmTzwTwmerjkSSK4XT8FyJS263"
session_id = "session1"
organization = "SCS"

# Embeddings

In [3]:
# Get Embeddings
def get_embeddings(text):
    """
    This function returns a list of the embeddings for a given query
    """
    text_embeddings = EMBEDDINGS.embed_query(text)
    # print("Generating Embeddings: Done!")
    return text_embeddings

# query_embeddings = get_embeddings(text=query)
# print(query_embeddings)
# type(query_embeddings)

In [4]:
def resolve_namespace(query, query_embeddings, summaries):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """
    def ambiguous_fuzzy(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """   
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        score_diff = ranked_candidates[0][1] - ranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff > 0.2:
            print("Cosine similarity is clear")
            return ranked_candidates[0][0]
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Cross Encoder:", reranked_candidates)

        score_diff = reranked_candidates[0][1] - reranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff < 0.9:
            print("Ambiguous in Cross Encoder")
            return ""

        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]

    def get_most_similar_namespace(query, query_embeddings, summaries):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        top_two = {}

        similarities = {
            title: (fuzz.token_set_ratio(query.lower(), f"{title}".lower()) + fuzz.token_set_ratio(query.lower(), f"{summary}".lower()))/2
            for title, summary in summaries.items()
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)

        # Check for ambiguity
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:
                print("Ambiguous fuzzy match.")
                top_two[ranked_namespaces[0][0]] = summaries.get(ranked_namespaces[0][0])
                top_two[ranked_namespaces[1][0]] = summaries.get(ranked_namespaces[1][0])
                print("Top two:", top_two)
                return ambiguous_fuzzy(query_embeddings, top_two)

        return ranked_namespaces[0][0] if ranked_namespaces else ""

    namespace = get_most_similar_namespace(query, query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

# meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization)
# print(meeting_title)
# type(meeting_title)

# Pinecone

In [13]:
# Get Relevant Documents
def query_pinecone_index(query_embeddings, meeting_title, index, top_k=3, include_metadata=True):
    """
    Query a Pinecone index.
    """
    # Query Pinecone using the build filter conditions
    query_response = index.query(
        vector=query_embeddings,
        top_k=top_k,
        include_metadata=include_metadata,
        namespace=meeting_title )

    print("Querying Pinecone Index: Done!")
    return [match['metadata']['text'] for match in query_response['matches']], [match['metadata']['date'] for match in query_response['matches']], [match['metadata']['title'] for match in query_response['matches']]

# index = pc.Index(organization.lower())
# text_answers, dates, titles = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
# print(f"{text_answers}\n{dates[0]}\n{titles[0]}")
# type(text_answers)
# type(dates)
# type(titles)

# Chat History

In [6]:
def initialize_chat_history(user_id, session_id):
    """
    Initializes a chat history object.
    """
    chat_history = []
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    doc_snapshot = doc_ref.get()
    try:
        if doc_snapshot.exists:
            messages = doc_snapshot.get('messages')
            if messages is None:
                print(f"No 'messages' field found in document for user_id={user_id}, session_id={session_id}")
                return chat_history
            messages = doc_snapshot.get('messages')

            for message in messages:
                chat_history.append(message)
            print(f"Chat History Initialized: {chat_history}")
        else:
            print(f"No document found for user_id={user_id}, session_id={session_id}")
    except Exception as e:
        print(f"Error initializing chat history: {str(e)}")
    
    return chat_history

def update_chat_history(user_id, session_id, chat_history):
    """
    Updates the chat history object.
    """
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    try:
        doc_ref.update({
            'messages': chat_history
        })
    except Exception as e:
        print(f"Error updating chat history: {str(e)}")

def process_chat_history(chat_history):
    """
    Changes the chat history list into a HumanMessages and AIMessages Schema
    """
    process_chat_history = []
    for idx, message in enumerate(chat_history):
        if idx % 2 == 0:
            process_chat_history.append(HumanMessage(message))
        else:
            process_chat_history.append(AIMessage(message))

        
    return process_chat_history

# chat_history = initialize_chat_history(user_id=user_id, session_id=session_id)

# Decomposition

In [14]:
def decomposition_query_process(question, text_answers, text_date, text_title, chat_history):
    """Implements decomposition query"""

    def output_parser(output):
        """
        Helps parses the LLM output, prints it, and returns it.
        """
        print("\n" + output.content + "\n")

        return output.content

    def decompose_question(question, chat_history):
        """
        Decomposes a complex question into smaller questions.
        """
        prompt = prompt_templates.decomposition_template().format(question=question, chat_history=chat_history)
        response = LLM.invoke(prompt)
        subquestions = response.content.split("\n")
        print("Decomposing Question: Done!")

        return subquestions
    
    def generate_qa_pairs(subquestions, context):
        """Generates QA pairs by answering each subquestion."""
        qa_pairs = []
        for subquestion in subquestions:
            context = context
            rag_prompt = prompt_templates.qa_template().format(context=context, subquestion=subquestion)
            answer = LLM.invoke(rag_prompt)
            qa_pairs.append((subquestion, answer))
        print("Generating QA Pairs: Done!")

        return qa_pairs
    
    def build_final_answer(question, context, qa_pairs, text_title, text_date, chat_history):
        """Builds a final answer by integrating the context and QA pairs."""
        qa_pairs_str = "\n".join([f"Q: {q}\nA: {a}" for q, a in qa_pairs])
        # final_prompt = prompt_templates.final_rag_template().format(context=context, qa_pairs=qa_pairs_str, question=question)
        final_prompt = prompt_templates.final_rag_template_with_memory().format(context=context, qa_pairs=qa_pairs_str, question=question, chat_history=chat_history, text_date=text_date, text_title=text_title)
        final_response = LLM.invoke(final_prompt)
        print("Building Final Answer: Done!")

        return final_response
    
    subquestions = decompose_question(question, chat_history)
    qa_pairs = generate_qa_pairs(subquestions, text_answers)
    print(qa_pairs)
    final_answer = build_final_answer(question, text_answers, qa_pairs, text_title[0], text_date[0], chat_history)

    return output_parser(final_answer)

# response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=chat_history)
# print(response)
# type(response)

# RAGAS

In [9]:
print(f"Query: {query}")
print(f"Relevant Document: {text_answers}")
print(f"Answer: {response}")

Query: Did they decide to use the Switchboard model as the starting point for their speaker adaptation?


NameError: name 'text_answers' is not defined

## Dataset

In [17]:
sample_queries = [
    "Can you summarize the main points of the discussion on remote control design?",
    "Besides the design, were there any other aspects of the remote control that were discussed?",
    "What did the Industrial Designer recommend doing regarding the size of the LCD display?",
    "What were the tasks or next steps agreed upon by the participants at the end of the meeting?",
    "Did the participants agree on a specific type of material to use for the remote control's casing?"
]

expected_responses = [
    "Here are the main points of the discussion on remote control design:\n\n* The remote control should be user-friendly and accessible to a wide range of users, including older adults and children.\n* The remote should have a simple and intuitive design, with clear and easily recognizable buttons.\n* The remote could have a combination of physical buttons and an LCD display with menus for additional functions.\n* The remote could have a flip-top design to save space and provide a larger screen for the LCD display.",
    "Yes, other aspects of the remote control were discussed, including:\n\n* The importance of making the remote control affordable to produce, with a production cost of around twelve pounds fifty.\n* The target market for the remote control is international, with a profit aim of fifty million Euros in the first year.\n* The remote control should be able to integrate multiple devices, such as TVs, amplifiers, and DVD players, into one device.",
    "The Industrial Designer did not recommend anything regarding the size of the LCD display.",
    "The meeting transcript does not contain the answer to this question.",
    "The participants did not agree on a specific type of material to use for the remote control's casing. They discussed using plastic, but they did not make a final decision."
]

In [19]:
dataset = []
meeting_title = "Initial Concept Meeting for Universal Remote Design: Goals, User Insights, and Creative Exercises"
index = pc.Index("scs")

for query,reference in zip(sample_queries,expected_responses):

    text_answers = query_pinecone_index(query_embeddings=get_embeddings(query), meeting_title=meeting_title, index=index)
    response = decomposition_query_process(question=query, text_answers=text_answers[0], text_date=text_answers[1], text_title=text_answers[2], chat_history=[])
    dataset.append(
        {
            "user_input":query,
            "retrieved_contexts":text_answers[0],
            "response":response,
            "reference":reference
        }
    )

Querying Pinecone Index: Done!
Decomposing Question: Done!
Generating QA Pairs: Done!
[('1. What are the key design principles for remote controls?', AIMessage(content='The key design principles for remote controls, as discussed in the context, include originality and trendiness to appeal to a wide market, ensuring user-friendliness for a diverse audience ranging from children to the elderly, and even pets. Additionally, the functional design is crucial, focusing on integrating multiple functions into a single device to avoid the clutter of multiple remotes. Reliability is also important, as indicated by the need for a remote that maintains its programming without frequent reconfiguration. Overall, the design should prioritize ease of use and practicality while being visually appealing.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 111, 'prompt_tokens': 386, 'total_tokens': 497, 'completion_tokens_details': {'accepted_prediction_tokens':

In [19]:
print(dataset)

[{'user_input': 'Did they decide to use the Switchboard model as the starting point for their speaker adaptation?', 'retrieved_contexts': ["microphones, you know, to the noise. And that should really improve things, um, further. And then you use those adapted models, which are not speaker adapted but sort of acous you know, channel adapted. | Grad E: Channel adapted. | PhD F: use that as the starting models for your speaker adaptation. | Professor B: Yeah. But the thing is, uh I mean, w when you it depends whether you're ju were just using this as a a starter task for you know, to get things going for conversational or if we're", "using. It was that's that was isolated digits. | Grad E: Maybe it's the Bell Gram. Bell Digits. Alright. | Professor B: Um. | PhD F: By the way, I think we can improve these numbers if we care to compr improve them by, um, not starting with the Switchboard models but by taking the Switchboard models and doing supervised adaptation on a small amount of digit d

## Test

In [20]:
from ragas import EvaluationDataset
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [21]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper


evaluator_llm = LangchainLLMWrapper(LLM)
evaluator_embeddings = LangchainEmbeddingsWrapper(EMBEDDINGS)
from ragas.metrics import Faithfulness, FactualCorrectness, ContextPrecision, ResponseRelevancy, LLMContextRecall

result = evaluate(dataset=evaluation_dataset,metrics=[Faithfulness(), FactualCorrectness(), ContextPrecision(), ResponseRelevancy(), LLMContextRecall()],llm=evaluator_llm, embeddings=evaluator_embeddings)
result

Evaluating: 100%|██████████| 25/25 [00:29<00:00,  1.18s/it]


{'faithfulness': 0.7750, 'factual_correctness': 0.2680, 'context_precision': 1.0000, 'answer_relevancy': 0.5076, 'context_recall': 0.3000}

In [22]:
os.environ["RAGAS_APP_TOKEN"] = "apt.4f8e-299f727bff54-c9b1-b0b4-f480bd0c-ea62a"

result.upload()

Evaluation results uploaded! View at https://app.ragas.io/dashboard/alignment/evaluation/6da27426-581f-4bf3-9395-642a4aa83ffc


'https://app.ragas.io/dashboard/alignment/evaluation/6da27426-581f-4bf3-9395-642a4aa83ffc'

In [12]:
from ragas import SingleTurnSample
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import ResponseRelevancy

sample = SingleTurnSample(
    user_input=query,
    response=response,
    retrieved_contexts=text_answers,
)

evaluator_llm = LangchainLLMWrapper(LLM)
evaluator_embeddings = LangchainEmbeddingsWrapper(EMBEDDINGS)
scorer = ResponseRelevancy(llm=evaluator_llm, embeddings=evaluator_embeddings)
await scorer.single_turn_ascore(sample)

0.841988900417566