# Libraries

In [1]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import hashlib
from pinecone import Pinecone
from langchain_openai import OpenAI
from langchain_core.messages import HumanMessage, AIMessage
from sklearn.metrics.pairwise import cosine_similarity
import firebase_admin
import google.cloud
from firebase_admin import credentials, firestore
from prompt_templates import prompt_templates
from google.cloud.firestore_v1.base_query import FieldFilter
from sentence_transformers import CrossEncoder
from collections import Counter
from fuzzywuzzy import fuzz
from dotenv import load_dotenv

  from tqdm.autonotebook import tqdm


# Tools

In [15]:
load_dotenv()

# Firestore Initialization
# credential_path = r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
credential_path = r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path

if not firebase_admin._apps:
    # cred = credentials.Certificate(r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    cred = credentials.Certificate(r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    firebase_admin.initialize_app(cred)

try:
    db = firestore.Client()
    print("*Firestore connected successfully!")
except Exception as e:
    print(f"Failed to connect to Firestore: {e}")

# API Keys Initialization
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY_EVALUATION')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

if not OPENAI_API_KEY:
    print("OpenAI API Key not found!")
if not PINECONE_API_KEY:
    print("Pinecone API Key not found!")

# Pinecone Initialization
try:
    pc = Pinecone(api_key=PINECONE_API_KEY)
    print("*Pinecone connected successfully!")
except Exception as e:
    print(f"Failed to connect to Pinecone: {e}")


# OpenAI Initialization
try:
    client=OpenAI(api_key=OPENAI_API_KEY)
    LLM = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
    EMBEDDINGS = OpenAIEmbeddings(model='text-embedding-3-small')
    print("*OpenAI connected successfully!")
except Exception as e:
    print(f"Failed to connect to OpenAI: {e}")

# CrossEncoder Initialization
try:
    reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
    print("*CrossEncoder connected successfully!")
except Exception as e:
    print(f"Failed to connect to CrossEncoder: {e}")

*Firestore connected successfully!
*Pinecone connected successfully!
*OpenAI connected successfully!


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


*CrossEncoder connected successfully!


# Queries

In [9]:
query = "Did they decide to use the Switchboard model as the starting point for their speaker adaptation?"
user_id = "WuhmTzwTwmerjkSSK4XT8FyJS263"
session_id = "session1"
organization = "SCS"

# Embeddings

In [8]:
# Get Embeddings
def get_embeddings(text):
    """
    This function returns a list of the embeddings for a given query
    """
    text_embeddings = EMBEDDINGS.embed_query(text)
    # print("Generating Embeddings: Done!")
    return text_embeddings

query_embeddings = get_embeddings(text=query)
print(query_embeddings)
type(query_embeddings)

[-0.04102969914674759, 0.0661308690905571, 0.07804980129003525, -0.012358343228697777, -0.047263797372579575, 0.0021060677245259285, 0.0016125921392813325, 0.015475391410291195, 0.031802136451005936, -0.020473655313253403, 0.06140723451972008, -0.04578079655766487, -0.044654812663793564, -0.051026225090026855, -0.018963191658258438, -0.051026225090026855, -0.02459310181438923, -0.02301398105919361, -0.0035564564168453217, 0.0037349658086895943, 0.021022913977503777, 0.030786005780100822, -0.02017156220972538, 0.027380594983696938, -0.05272892862558365, 0.01830407977104187, -0.02933046780526638, -0.037871453911066055, 0.045396313071250916, 0.04204582795500755, 0.00832128431648016, -0.039986103773117065, -0.013731492683291435, 0.021242618560791016, -0.027984781190752983, 0.04921366646885872, -0.009577715769410133, -0.00639544241130352, -0.03375200927257538, -0.014390603639185429, 0.005080651957541704, -0.014170899987220764, -0.021915461868047714, 0.0019086773972958326, 0.0210915729403495

list

In [None]:
def resolve_namespace(query, query_embeddings, summaries):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """
    def ambiguous_fuzzy(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """   
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        score_diff = ranked_candidates[0][1] - ranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff > 0.2:
            print("Cosine similarity is clear")
            return ranked_candidates[0][0]
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Cross Encoder:", reranked_candidates)

        score_diff = reranked_candidates[0][1] - reranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff < 0.9:
            print("Ambiguous in Cross Encoder")
            return ""

        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]

    def get_most_similar_namespace(query, query_embeddings, summaries):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        top_two = {}

        similarities = {
            title: (fuzz.token_set_ratio(query.lower(), f"{title}".lower()) + fuzz.token_set_ratio(query.lower(), f"{summary}".lower()))/2
            for title, summary in summaries.items()
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)

        # Check for ambiguity
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:
                print("Ambiguous fuzzy match.")
                top_two[ranked_namespaces[0][0]] = summaries.get(ranked_namespaces[0][0])
                top_two[ranked_namespaces[1][0]] = summaries.get(ranked_namespaces[1][0])
                print("Top two:", top_two)
                return ambiguous_fuzzy(query_embeddings, top_two)

        return ranked_namespaces[0][0] if ranked_namespaces else ""

    namespace = get_most_similar_namespace(query, query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization)
print(meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

# Pinecone

In [10]:
# Get Relevant Documents
def query_pinecone_index(query_embeddings, meeting_title, index, top_k=5, include_metadata=True):
    """
    Query a Pinecone index.
    """
    # Build filter conditions directly for Pinecone
    filter_conditions = {}

    # Include date and meeting title if specified
    if meeting_title.lower() != 'unknown':
        filter_conditions['title'] = meeting_title

    # Query Pinecone using the build filter conditions
    query_response = index.query(
        vector=query_embeddings,
        filter=filter_conditions,
        top_k=top_k,
        include_metadata=include_metadata,
        namespace=meeting_title )

    print("Querying Pinecone Index: Done!")
    return [match['metadata']['text'] for match in query_response['matches']], [match['metadata']['date'] for match in query_response['matches']], [match['metadata']['title'] for match in query_response['matches']]

index = pc.Index(organization.lower())
text_answers, dates, titles = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
print(f"{text_answers}\n{dates[0]}\n{titles[0]}")
type(text_answers)
type(dates)
type(titles)

Querying Pinecone Index: Done!
["Czech: Hello my name is Czech.\nGian: Hello my name is Gian.\nShaundyl: Hello my name is Shaundyl.\nCzech (Team Lead): Alright, everyone, thanks for joining today’s meeting. We have about 10 minutes to go over the final details before the product launch. Let's start with the progress update. Bob, how are we doing on the development front?", 'Gian: We’ve set up a dedicated support channel for the product and briefed the customer support team on the common issues we’re anticipating. We’ll also monitor social media for any unexpected feedback.\nCzech: Sounds like we’re in good shape. Thanks, everyone. Let’s aim to regroup tomorrow for a final status check. Anything else before we wrap up?\nShaundyl: Nothing from my side. I’ll update you if any blockers come up.\nGian: I’m all set. Let’s get this done!', 'Czech: Great to hear that. Gian, how are we looking on the project timeline? Any changes or concerns?\nGian (Project Manager): We’re on track, but barely.

list

# Chat History

In [37]:
def initialize_chat_history(user_id, session_id):
    """
    Initializes a chat history object.
    """
    chat_history = []
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    doc_snapshot = doc_ref.get()
    try:
        if doc_snapshot.exists:
            messages = doc_snapshot.get('messages')
            if messages is None:
                print(f"No 'messages' field found in document for user_id={user_id}, session_id={session_id}")
                return chat_history
            messages = doc_snapshot.get('messages')

            for message in messages:
                chat_history.append(message)
            print(f"Chat History Initialized: {chat_history}")
        else:
            print(f"No document found for user_id={user_id}, session_id={session_id}")
    except Exception as e:
        print(f"Error initializing chat history: {str(e)}")
    
    return chat_history

def update_chat_history(user_id, session_id, chat_history):
    """
    Updates the chat history object.
    """
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    try:
        doc_ref.update({
            'messages': chat_history
        })
    except Exception as e:
        print(f"Error updating chat history: {str(e)}")

def process_chat_history(chat_history):
    """
    Changes the chat history list into a HumanMessages and AIMessages Schema
    """
    process_chat_history = []
    for idx, message in enumerate(chat_history):
        if idx % 2 == 0:
            process_chat_history.append(HumanMessage(message))
        else:
            process_chat_history.append(AIMessage(message))

        
    return process_chat_history

chat_history = initialize_chat_history(user_id=user_id, session_id=session_id)

Chat History Initialized: []


# Decomposition

In [39]:
def decomposition_query_process(question, text_answers, chat_history):
    """Implements decomposition query"""

    def output_parser(output):
        """
        Helps parses the LLM output, prints it, and returns it.
        """
        print("\n" + output.content + "\n")

        return output.content

    def decompose_question(question):
        """
        Decomposes a complex question into smaller questions.
        """
        prompt = prompt_templates.decomposition_template().format(question=question)
        response = LLM.invoke(prompt)
        subquestions = response.content.split("\n")
        print("Decomposing Question: Done!")

        return subquestions
    
    def generate_qa_pairs(subquestions, context):
        """Generates QA pairs by answering each subquestion."""
        qa_pairs = []
        for subquestion in subquestions:
            context = context
            rag_prompt = prompt_templates.qa_template().format(context=context, subquestion=subquestion)
            answer = LLM.invoke(rag_prompt)
            qa_pairs.append((subquestion, answer))
        print("Generating QA Pairs: Done!")

        return qa_pairs
    
    def build_final_answer(question, context, qa_pairs):
        """Builds a final answer by integrating the context and QA pairs."""
        qa_pairs_str = "\n".join([f"Q: {q}\nA: {a}" for q, a in qa_pairs])
        # final_prompt = prompt_templates.final_rag_template().format(context=context, qa_pairs=qa_pairs_str, question=question)
        final_prompt = prompt_templates.final_rag_template_with_memory().format(context=context, qa_pairs=qa_pairs_str, question=question, chat_history=chat_history)
        final_response = LLM.invoke(final_prompt)
        print("Building Final Answer: Done!")

        return final_response
    
    subquestions = decompose_question(question)
    qa_pairs = generate_qa_pairs(subquestions, text_answers)
    print(qa_pairs)
    final_answer = build_final_answer(question, text_answers, qa_pairs)

    return output_parser(final_answer)

response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=chat_history)
print(response)
type(response)

Decomposing Question: Done!
Generating QA Pairs: Done!
[('1. What is the main objective of the project meeting?', AIMessage(content='The main objective of the project meeting is to go over the final details before the product launch, ensuring that all aspects of the project, including development, support, and marketing, are on track and any potential issues are addressed promptly to meet the launch date successfully.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 53, 'prompt_tokens': 441, 'total_tokens': 494, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_7c63087da1', 'finish_reason': 'stop', 'logprobs': None}, id='run-c9bb51b4-0a8f-41c0-b450-1a728ee2d26d-0', usage_metadata={'input_tokens': 441, 'output_tokens': 53, 

str

# RAGAS

In [None]:
print(f"Query: {query}")
print(f"Relevant Document: {text_answers}")
print(f"Answer: {response}")

## Dataset

In [40]:
sample_queries = [
    "Did they decide to use the Switchboard model as the starting point for their speaker adaptation?",
    "What benefit did the lapel microphone provide that made it perform better than expected?",
    "What did PhD A recommend to do when discussing forced alignment?",
    "Can you list the main tasks that were agreed upon during the meeting?",
    "What did Professor B recommend to do during the discussion of digits?"
]

expected_responses = [
    "Yes, they decided to use the Switchboard model as the starting point for their speaker adaptation, but they would also adapt the models to the room acoustics.",
    "Lapel microphone had a very high-quality microphone. It picks up other people's speakers other people's talking is an indication of that it the fact it is a good microphone. Some advantages to it in terms of breath noises and clothes rustling if no one else is talking.",
    "PhD A suggested using a proximity constraint to help the forced alignment algorithm determine which words most likely correspond to each speaker. They also proposed to allow for reject models that could match fragments and noise. Additionally, PhD A suggested marking the beginning and end of foreground speech by hand to improve the accuracy of the alignment.",
    "The participants decided to conduct a test to determine if the SRI system would show improvement if trained on TI digits, and to compare the results to the system trained on Switchboard data. They also agreed to investigate methods for improving the SRI system's performance in noisy conditions and to explore techniques for enhancing the accuracy of forced alignment.",
    "Professor B recommended running a test to determine whether the SRI system would show improvement if trained on TI digits, and to compare the results to the system trained on Switchboard data."
]

In [None]:
dataset = []
meeting_title = "Discussion on Digits Experiment Results and Scheduling for Forced Alignment Study"
index = pc.Index("scs")

for query,reference in zip(sample_queries,expected_responses):

    text_answers = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
    response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=[])
    dataset.append(
        {
            "user_input":query,
            "retrieved_contexts":text_answers,
            "response":response,
            "reference":reference
        }
    )

Querying Pinecone Index: Done!
Decomposing Question: Done!
Generating QA Pairs: Done!
[('1. How many people attended the project meeting?', AIMessage(content='Four people attended the project meeting: Czech, Gian, Shaundyl, and Bob.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 439, 'total_tokens': 458, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_d93b1f03d6', 'finish_reason': 'stop', 'logprobs': None}, id='run-6754218e-593b-4b68-a32d-f3ffa8caf35c-0', usage_metadata={'input_tokens': 439, 'output_tokens': 19, 'total_tokens': 458, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})), ('2. What were the names of the individuals who attended 

In [17]:
print(dataset)

[{'user_input': 'Who were the attendees of the project meeting', 'retrieved_contexts': ["Czech: Hello my name is Czech.\nGian: Hello my name is Gian.\nShaundyl: Hello my name is Shaundyl.\nCzech (Team Lead): Alright, everyone, thanks for joining today’s meeting. We have about 10 minutes to go over the final details before the product launch. Let's start with the progress update. Bob, how are we doing on the development front?", 'Gian: We’ve set up a dedicated support channel for the product and briefed the customer support team on the common issues we’re anticipating. We’ll also monitor social media for any unexpected feedback.\nCzech: Sounds like we’re in good shape. Thanks, everyone. Let’s aim to regroup tomorrow for a final status check. Anything else before we wrap up?\nShaundyl: Nothing from my side. I’ll update you if any blockers come up.\nGian: I’m all set. Let’s get this done!', 'Czech: Great to hear that. Gian, how are we looking on the project timeline? Any changes or concer

## Test

In [18]:
from ragas import EvaluationDataset
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper


evaluator_llm = LangchainLLMWrapper(LLM)
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

result = evaluate(dataset=evaluation_dataset,metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],llm=evaluator_llm)
result

Evaluating: 100%|██████████| 25/25 [01:12<00:00,  2.91s/it]


{'context_recall': 1.0000, 'faithfulness': 0.8933, 'factual_correctness': 0.2980, 'answer_relevancy': 0.8298, 'context_precision': 0.7000}

In [20]:
os.environ["RAGAS_APP_TOKEN"] = "apt.4f8e-299f727bff54-c9b1-b0b4-f480bd0c-ea62a"

result.upload()

Evaluation results uploaded! View at https://app.ragas.io/dashboard/alignment/evaluation/43151c65-2b42-45b1-94a3-dc122c07a39d


'https://app.ragas.io/dashboard/alignment/evaluation/43151c65-2b42-45b1-94a3-dc122c07a39d'

In [12]:
from ragas import SingleTurnSample
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import ResponseRelevancy

sample = SingleTurnSample(
    user_input=query,
    response=response,
    retrieved_contexts=text_answers,
)

evaluator_llm = LangchainLLMWrapper(LLM)
evaluator_embeddings = LangchainEmbeddingsWrapper(EMBEDDINGS)
scorer = ResponseRelevancy(llm=evaluator_llm, embeddings=evaluator_embeddings)
await scorer.single_turn_ascore(sample)

0.841988900417566