# Libraries

In [11]:
from dotenv import load_dotenv
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import hashlib
from pinecone import Pinecone
from langchain_openai import OpenAI
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from sklearn.metrics.pairwise import cosine_similarity
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import firebase_admin
import google.cloud
from firebase_admin import credentials, firestore
from prompt_templates import prompt_templates
from langchain_core.prompts import MessagesPlaceholder
from google.cloud.firestore_v1.base_query import FieldFilter
from sentence_transformers import CrossEncoder

# APIs

In [12]:
load_dotenv()

# Firestore Initialization
# credential_path = r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
credential_path = r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path

if not firebase_admin._apps:
    # cred = credentials.Certificate(r'C:\Users\user\OneDrive\Desktop\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    cred = credentials.Certificate(r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    firebase_admin.initialize_app(cred)

try:
    db = firestore.Client()
    print("*Firestore connected successfully!")
except Exception as e:
    print(f"Failed to connect to Firestore: {e}")

# API Keys Initialization
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

if not OPENAI_API_KEY:
    print("OpenAI API Key not found!")
if not PINECONE_API_KEY:
    print("Pinecone API Key not found!")

# Pinecone Initialization
try:
    pc = Pinecone(api_key=PINECONE_API_KEY)
    print("*Pinecone connected successfully!")
except Exception as e:
    print(f"Failed to connect to Pinecone: {e}")


# OpenAI Initialization
try:
    client=OpenAI(api_key=OPENAI_API_KEY)
    LLM = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
    EMBEDDINGS = OpenAIEmbeddings(model='text-embedding-3-small')
    print("*OpenAI connected successfully!")
except Exception as e:
    print(f"Failed to connect to OpenAI: {e}")

# CrossEncoder Initialization
try:
    reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
    print("*CrossEncoder connected successfully!")
except Exception as e:
    print(f"Failed to connect to CrossEncoder: {e}")

*Firestore connected successfully!
*Pinecone connected successfully!
*OpenAI connected successfully!
*CrossEncoder connected successfully!


# Query

In [30]:
query = "What was the primary objective of the meeting?"
user_id = "WuhmTzwTwmerjkSSK4XT8FyJS263"
session_id = "session11"
organization = "SCS"

## Embeddings

In [14]:
# Get Embeddings
def get_embeddings(text):
    """
    This function returns a list of the embeddings for a given query
    """
    text_embeddings = EMBEDDINGS.embed_query(text)
    return text_embeddings

query_embeddings = get_embeddings(text=query)
print(query_embeddings)
type(query_embeddings)

[-0.009281313046813011, 0.024569442495703697, 0.015751836821436882, 0.0012011845828965306, -0.012841171585023403, 0.006317142862826586, -0.024112867191433907, 0.04405948519706726, -0.017135830596089363, -0.008753398433327675, 0.015338066034018993, -0.009124365635216236, -0.034899450838565826, -0.031475137919187546, -0.009024489670991898, -0.02611038275063038, 0.005011623725295067, 0.03458555415272713, -0.005792795214802027, 0.0362691767513752, 0.009281313046813011, -0.0055538066662848, -0.004933150019496679, 0.013804259710013866, -0.09262765198945999, -0.050679825246334076, 0.050679825246334076, 0.004476575180888176, 0.02887836843729019, 0.0032495297491550446, -0.01109334547072649, -0.022928625345230103, -0.017135830596089363, 0.0248119980096817, -0.05099371820688248, 0.04214758053421974, -0.012206247076392174, 0.0005497626843862236, -0.013376220129430294, -0.04314633831381798, -0.009295581839978695, -0.019761135801672935, -0.015409406274557114, 0.021944135427474976, 0.0338436216115951

list

# Standard Resolve Namespace

In [106]:
def resolve_namespace(query_embeddings, organization):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }

        print("Computed similarities:", similarities)

        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)
        
        return ranked_namespaces[0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization)
print(meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

# Pinecone

In [None]:
# Get Relevant Documents
def query_pinecone_index(query_embeddings, meeting_title, index, top_k=5, include_metadata=True):
    """
    Query a Pinecone index.
    """
    # Build filter conditions directly for Pinecone
    filter_conditions = {}

    # Include date and meeting title if specified
    if meeting_title.lower() != 'unknown':
        filter_conditions['title'] = meeting_title

    # Query Pinecone using the build filter conditions
    query_response = index.query(
        vector=query_embeddings,
        filter=filter_conditions,
        top_k=top_k,
        include_metadata=include_metadata,
        namespace=meeting_title )

    print("Querying Pinecone Index: Done!")
    return [match['metadata']['text'] for match in query_response['matches']], [match['metadata']['date'] for match in query_response['matches']], [match['metadata']['title'] for match in query_response['matches']]

index = pc.Index(organization.lower())
text_answers, dates, titles = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
print(f"{text_answers}\n{dates[0]}\n{titles[0]}")
type(text_answers)
type(dates)
type(titles)

Querying Pinecone Index: Done!
["[00:00:00] John: Good morning, everyone. Thank you for joining today's kickoff meeting for our new\nsoftware development project. We'll be discussing the project scope, timelines, and\nresponsibilities. Let's get started with a quick round of introductions. I'll go first. I'm John, the\nproject manager. I'll be overseeing the project and ensuring we stay on track. Alice, would you\nlike to go next?\n[00:00:20]", "[00:00:55]\nJohn: Great, thank you. Now that we've introduced ourselves, let's dive into the project scope.\nOur goal is to develop a new customer management system for our client. The system should\nallow users to manage customer data, track interactions, and generate reports. Alice, can you\ngive us an overview of the technical requirements?\n[00:01:20]\nAlice: Sure, John. The system will be built using a microservices architecture. We'll be using", "Alice: Sure, thanks John. Hi, everyone. I'm Alice, the lead developer. I'll be responsible fo

list

# Chat History

In [15]:
def initialize_chat_history(user_id, session_id):
    """
    Initializes a chat history object.
    """
    chat_history = []
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    doc_snapshot = doc_ref.get()
    try:
        if doc_snapshot.exists:
            messages = doc_snapshot.get('messages')
            if messages is None:
                print(f"No 'messages' field found in document for user_id={user_id}, session_id={session_id}")
                return chat_history
            messages = doc_snapshot.get('messages')

            for message in messages:
                chat_history.append(message)
            print(f"Chat History Initialized: {chat_history}")
        else:
            print(f"No document found for user_id={user_id}, session_id={session_id}")
            return []
    except Exception as e:
        print(f"Error initializing chat history: {str(e)}")
    
    return chat_history

def update_chat_history(user_id, session_id, chat_history):
    """
    Updates the chat history object.
    """
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    try:
        doc_ref.update({
            'messages': chat_history
        })
    except Exception as e:
        print(f"Error updating chat history: {str(e)}")

def process_chat_history(chat_history):
    """
    Changes the chat history list into a HumanMessages and AIMessages Schema
    """
    process_chat_history = []
    for idx, message in enumerate(chat_history):
        if idx % 2 == 0:
            process_chat_history.append(HumanMessage(message))
        else:
            process_chat_history.append(AIMessage(message))

        
    return process_chat_history

chat_history = initialize_chat_history(user_id=user_id, session_id=session_id)

No document found for user_id=WuhmTzwTwmerjkSSK4XT8FyJS263, session_id=session11


# Resolve Namespace Variations

## Reranking Resolve Namespace without Session Context

In [105]:
from sentence_transformers import CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity
print(query)

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def resolve_namespaces(query_embeddings, organization):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """       
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries)

    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespaces(query_embeddings=query_embeddings, organization=organization)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

What did Alice do in the meeting?
Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discuss

str

## Reranking Resolve Namespace with Session Context

In [None]:
from sentence_transformers import CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity


reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def resolve_namespace(query_embeddings, organization, chat_history):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries, session_context):
        """
        Rank namespaces by semantic similarity to the query.
        """

        # Create a context-aware query
        context_aware_query = f"{session_context} {query}" if session_context else query

        # Compute similarity with past session conversation
        query_embeddings = get_embeddings(context_aware_query)
        
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        # # If the session is highly relevant, return it
        # if session_similarity > 0.85:
        #     print("\n✅ Continuing with the current session (high similarity)")
        #     return "Current Session Context"
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], context_aware_query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]
    
    session_context = " ".join(chat_history) if chat_history else ""
    print(session_context)
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries, session_context)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, chat_history=chat_history)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

What is the title of the meeting where John and Alice were present? The title of the meeting where John and Alice were present is "Kickoff Meeting." when was it held? The meeting titled "Kickoff Meeting" where John and Alice were present does not have a specified date in the provided context. Therefore, I cannot determine when it was held based on the information given. try again The context provided does not mention a meeting titled "Kickoff Meeting" or the presence of John and Alice. Therefore, I cannot provide the title or the date of such a meeting based on the available information. When was it held? The context does not specify the date of the "Kickoff Meeting" or any meeting involving John and Alice. Therefore, I cannot determine when it was held based on the information given. When was it held? The "Kickoff Meeting" was held on December 9, 2024. What was the meeting about? The meeting was about kicking off a new software development project aimed at creating a customer manageme

str

## Namespace Prediction Using Session Context

In [19]:
from collections import Counter

def resolve_namespace(query_embeddings, organization, user_id, session_id):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def fetch_namespaces_used(user_id, session_id):
        return ["Project Meeting", "Kickoff Meeting"]

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_namespaces = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n# Initial Ranking (Cosine Similarity):", ranked_namespaces)

        return ranked_namespaces
    
    
    summaries = fetch_summaries_by_organization(organization)
    ranked_namespaces = get_most_similar_namespace(query_embeddings, summaries)

    past_namespaces = fetch_namespaces_used(user_id, session_id)

    namespace_counts = Counter(past_namespaces)

    namespace_weights = {
        title: (namespace_counts.get(title, 0) / len(past_namespaces) if past_namespaces else 0) for title, _ in ranked_namespaces
    }

    final_scores = {
        title: (sim + namespace_weights.get(title, 0)) for title, sim in ranked_namespaces
    }

    final_namespace = max(final_scores, key=final_scores.get)

    print(f"Session-aware namespace ranking: {final_scores}")
    return final_namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, user_id=user_id, session_id=session_id)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

## Namespace Bayesian Updating

In [5]:
from collections import Counter
import math

def resolve_namespace(query_embeddings, organization, user_id, session_id):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def fetch_namespaces_used(user_id, session_id):
        return ["Project Meeting", "Project Meeting", "Project Meeting", "Kickoff Meeting", "Kickoff Meeting"]
    
    def get_bayesian_update_namespaces(past_namespaces, decay_rate=0.7):
        """
        Applies Bayesian updating to boost frequently used namespaces.
        """
        namespace_counts = Counter(past_namespaces)

        recency_weights = {title: (i + 1) ** decay_rate for i, title in enumerate(reversed(past_namespaces))}
        print("## Recency weights: ", recency_weights)

        # Normalize weights
        weighted_counts = {title: recency_weights.get(title, 0) + namespace_counts[title] for title in namespace_counts}
        print("## Weighted counts: ", weighted_counts)

        total_weighted_count = sum(weighted_counts.values())

        # Compute Bayesian probabilities with recency bias
        posteriors = {title: weighted_counts[title] / total_weighted_count for title in namespace_counts}
        print("\n# Bayesian Updating with Recency Bias: ", posteriors)

        return posteriors

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }

        # Rank by similarity
        ranked_namespaces = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n# Initial Ranking (Cosine Similarity):", ranked_namespaces)

        return ranked_namespaces
    
    
    summaries = fetch_summaries_by_organization(organization)
    print("Summaries: ", summaries)

    past_namespaces = fetch_namespaces_used(user_id, session_id)
    print("Past Namespaces: ", past_namespaces)

    bayesian_scores = get_bayesian_update_namespaces(past_namespaces)

    ranked_namespaces = get_most_similar_namespace(query_embeddings, summaries)

    final_scores = {
        title: (bayesian_scores.get(title, 0) + sim) for title, sim in ranked_namespaces
    }

    final_namespace = max(final_scores, key=final_scores.get)

    print(f"Bayesian-updating namespace ranking: {final_scores}")
    return final_namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, user_id=user_id, session_id=session_id)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

## Only Cross Encoder

In [8]:
from sentence_transformers import CrossEncoder

print(query)

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def resolve_namespaces(query_embeddings, organization):
    """
    Resolves the namespace by selecting the most relevant one based on the Cross-Encoder model.
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query, summaries):
        """
        Rank namespaces by relevance to the query using the Cross-Encoder model.
        """       
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summary, query) for summary in summaries.values()]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Rank by Cross-Encoder scores
        ranked_candidates = sorted(zip(summaries.keys(), scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", ranked_candidates)
        
        return ranked_candidates[0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query, summaries)

    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespaces(query_embeddings=query_embeddings, organization=organization)
print("\n Namespace Selected: ", meeting_title)


What were the project meeting all about?


RetryError: Timeout of 300.0s exceeded, last exception: 503 failed to connect to all addresses; last error: UNKNOWN: ipv4:142.251.220.202:443: tcp handshaker shutdown

## Only Cosine Similarity

In [None]:
def resolve_namespace_cs(query_embeddings, organization):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }

        print("Computed similarities:", similarities)

        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

        cos_diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]

        if cos_diff < 0.15:
            print("Ambiguous")
            return ""

        print("Ranked namespaces:", ranked_namespaces)
        
        return ranked_namespaces[0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

print(query)

meeting_title = resolve_namespace_cs(query_embeddings=query_embeddings, organization=organization)
print(meeting_title)
type(meeting_title)

What are the kickoff meeting all about?
Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team d

str

## Fuzzy Match Meeting Title

In [139]:
from fuzzywuzzy import fuzz

def resolve_namespace_cs(query_text, organization):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """

    def fetch_namespaces_by_organization(organization):
        """
        Fetches namespaces by organization.
        """
        namespaces = []
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")  # Assuming meetingTitle is the namespace
            if meeting_title:
                namespaces.append(meeting_title)
        
        print(f"Fetched namespaces for organization '{organization}': {namespaces}")
        return namespaces

    def get_most_similar_namespace(query_text, namespaces):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        similarities = {
            namespace: fuzz.token_set_ratio(query_text.lower(), namespace.lower())
            for namespace in namespaces
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

        # Check if the difference between the top 2 matches is too small (ambiguous)
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:  # A threshold to detect ambiguity (adjust as needed)
                print("Ambiguous fuzzy match.")
                return ""

        print("Ranked namespaces:", ranked_namespaces)

        return ranked_namespaces[0][0] if ranked_namespaces else ""

    namespaces = fetch_namespaces_by_organization(organization)
    namespace = get_most_similar_namespace(query_text, namespaces)
    print(f"Selected namespace: {namespace}")
    return namespace

# Usage
print(query)
meeting_title = resolve_namespace_cs(query_text=query, organization='SCS')
print(meeting_title)
print(type(meeting_title))


I'm talking about project meeting
Fetched namespaces for organization 'SCS': ['April Meeting', 'Hello World', 'Intramural 2024 - 2025', 'Intramural 2025 - 2026', 'Kickoff Meeting', 'March 17', 'March 17 2', 'March 18 2025', 'March 18 2025 2', 'March 18 2025 3', 'March 18 2025 4', 'March 18 2025 5 ', 'March 29 Meeting', 'March 30', 'New Meeting', 'Project Meeting', 'Trial2', 'newMeetIntrams', 'newTrialMeet', 'trial']
Computed fuzzy similarities: {'April Meeting': 70, 'Hello World': 18, 'Intramural 2024 - 2025': 23, 'Intramural 2025 - 2026': 23, 'Kickoff Meeting': 64, 'March 17': 20, 'March 17 2': 23, 'March 18 2025': 22, 'March 18 2025 2': 21, 'March 18 2025 3': 21, 'March 18 2025 4': 21, 'March 18 2025 5 ': 21, 'March 29 Meeting': 61, 'March 30': 20, 'New Meeting': 78, 'Project Meeting': 100, 'Trial2': 21, 'newMeetIntrams': 34, 'newTrialMeet': 22, 'trial': 21}
Ranked namespaces: [('Project Meeting', 100), ('New Meeting', 78), ('April Meeting', 70), ('Kickoff Meeting', 64), ('March 29 M

## Fuzzy Matching Summary

In [140]:
from fuzzywuzzy import fuzz

def resolve_namespace_cs(query_text, organization):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """

    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_text, summaries):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        top_two = {}

        similarities = {
            title: fuzz.token_set_ratio(query_text.lower(), summary.lower())
            for title, summary in summaries.items()
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)

        # Check if the difference between the top 2 matches is too small (ambiguous)
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:  # A threshold to detect ambiguity (adjust as needed)
                print("Ambiguous fuzzy match.")
                top_two[ranked_namespaces[0][0]] = summaries.get(ranked_namespaces[0][0])
                top_two[ranked_namespaces[1][0]] = summaries.get(ranked_namespaces[1][0])
                print("Top two: ", top_two)
                return top_two

        print("Ranked namespaces:", ranked_namespaces)

        return ranked_namespaces[0][0] if ranked_namespaces else ""

    summaries = fetch_summaries_by_organization(organization)
    namespace = get_most_similar_namespace(query_text, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

# Usage
print(query)
meeting_title = resolve_namespace_cs(query_text=query, organization=organization)
print(meeting_title)
print(type(meeting_title))


I'm talking about project meeting
Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discuss

## First title then summary 

In [161]:
def resolve_namespace_cs(query_embeddings, organization):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """

        title_embeddings = {title: get_embeddings(title) for title in summaries.keys()}
        title_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in title_embeddings.items()
        }
        print("Computed title similarities:", title_similarities)
        
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed similarities:", summary_similarities)

        # Combine title and summary similarities (with title having higher priority)
        combined_similarities = {}
        for title in summaries.keys():
            # Weight title similarity higher (e.g., 0.7 for title, 0.3 for summary)
            combined_score = 0.7 * title_similarities.get(title, 0) + 0.3 * summary_similarities.get(title, 0)
            combined_similarities[title] = combined_score
        
        print("Combined similarities (title + summary):", combined_similarities)


        ranked_namespaces = sorted(combined_similarities.items(), key=lambda x: x[1], reverse=True)

        cos_diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]

        print("Ranked namespaces:", ranked_namespaces)
        
        return ranked_namespaces[0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

print(query)

meeting_title = resolve_namespace_cs(query_embeddings=query_embeddings, organization=organization)
print(meeting_title)
type(meeting_title)

What did alice contribute in the meeting?
Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team

str

## Combined Filtering (Fuzzy summary, cosine similarity)

In [None]:
from fuzzywuzzy import fuzz

def resolve_namespace_cf(query_text, organization):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """

    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def ambiguous_fuzzy(query_text, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        query_embeddings = get_embeddings(query_text)       
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)

        score_diff = reranked_candidates[0][1] - reranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff < 0.9:
            print("Ambiguous in Cross Encoder")
            return ""

        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]

    def get_most_similar_namespace(query_text, summaries):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        top_two = {}

        similarities = {
            title: fuzz.token_set_ratio(query_text.lower(), summary.lower())
            for title, summary in summaries.items()
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)

        # Check if the difference between the top 2 matches is too small (ambiguous)
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:  # A threshold to detect ambiguity (adjust as needed)
                print("Ambiguous fuzzy match.")
                top_two[ranked_namespaces[0][0]] = summaries.get(ranked_namespaces[0][0])
                top_two[ranked_namespaces[1][0]] = summaries.get(ranked_namespaces[1][0])
                print("Top two: ", top_two)
                return ambiguous_fuzzy(query_text, top_two)

        print("Ranked namespaces:", ranked_namespaces)

        return ranked_namespaces[0][0] if ranked_namespaces else ""

    summaries = fetch_summaries_by_organization(organization)
    namespace = get_most_similar_namespace(query_text, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

# Usage
print(query)
meeting_title = resolve_namespace_cf(query_text=query, organization=organization)
print(meeting_title)
print(type(meeting_title))


I'm talking about project meeting
Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discuss

## Combine filtering (fuzzy title, cosine similarity)

In [26]:
from fuzzywuzzy import fuzz

def resolve_namespace_cf2(query_text, organization):
    """
    Resolves the namespace by selecting the most similar one using fuzzy matching (fuzzywuzzy).
    """

    def fetch_namespaces_by_organization(organization):
        """
        Fetches namespaces by organization.
        """
        namespaces = []
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")  # Assuming meetingTitle is the namespace
            if meeting_title:
                namespaces.append(meeting_title)
        
        print(f"Fetched namespaces for organization '{organization}': {namespaces}")
        return namespaces

    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def ambiguous_fuzzy(query_text, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        query_embeddings = get_embeddings(query_text)       
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        score_diff = ranked_candidates[0][1] - ranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff > 0.2:
            return ranked_candidates[0][0]
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Cross Encoder:", reranked_candidates)

        score_diff = reranked_candidates[0][1] - reranked_candidates[1][1]
        print("Score difference:", score_diff)

        if score_diff < 0.9:
            print("Ambiguous in Cross Encoder")
            return ""

        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]

    def get_most_similar_namespace(query_text, summaries):
        """
        Rank namespaces by fuzzy matching (using fuzzywuzzy's token_set_ratio).
        """
        top_two = {}

        similarities = {
            title: fuzz.token_set_ratio(query_text.lower(), f"{title} {summary}".lower())
            for title, summary in summaries.items()
        }

        print("Computed fuzzy similarities:", similarities)

        # Rank namespaces based on similarity score
        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)

        # Check for ambiguity
        if len(ranked_namespaces) > 1:
            diff = ranked_namespaces[0][1] - ranked_namespaces[1][1]
            if diff < 15:
                print("Ambiguous fuzzy match.")
                top_two[ranked_namespaces[0][0]] = summaries.get(ranked_namespaces[0][0])
                top_two[ranked_namespaces[1][0]] = summaries.get(ranked_namespaces[1][0])
                print("Top two:", top_two)
                return ambiguous_fuzzy(query_text, top_two)

        return ranked_namespaces[0][0] if ranked_namespaces else ""
    
    summaries = fetch_summaries_by_organization(organization)
    namespace = get_most_similar_namespace(query_text, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

# Usage
print(query)
meeting_title = resolve_namespace_cf2(query_text=query, organization='SCS')
print(meeting_title)
print(type(meeting_title))


Did they decide to use the Switchboard model as the starting point for their speaker adaptation?
Fetched summaries for organization 'SCS': {'Discussion on Digits Experiment Results and Scheduling for Forced Alignment Study': 'The meeting, led by Professor B and attended by various graduate students and postdocs, focused on two main agenda items: discussing results from a digits experiment and planning for a forced alignment study. Grad E initiated the meeting, confirming attendance and outlining the agenda. The group discussed the performance of different microphones used in the digits task, noting that the lapel microphone performed surprisingly well due to its quality. They also talked about the challenges of aligning schedules for an upcoming Saturday meeting and considered the possibility of including a colleague via conference call. The discussion included technical details about recognition systems, adaptation methods, and the importance of data quality. As the meeting concluded,

# Pinecone

In [None]:
import numpy as np

# Get Relevant Documents
def query_pinecone_index(query_embeddings, meeting_title, index, top_k=5, include_metadata=True):
    """
    Query a Pinecone index.
    """
    # Build filter conditions directly for Pinecone
    filter_conditions = {}

    # Include date and meeting title if specified
    if meeting_title.lower() != 'unknown':
        filter_conditions['title'] = meeting_title

    # Query Pinecone using the build filter conditions
    query_response = index.query(
        vector=query_embeddings,
        filter=filter_conditions,
        top_k=top_k,
        include_metadata=include_metadata,
        namespace=meeting_title )   

    print("Querying Pinecone Index: Done!")
    return " ".join([doc['metadata']['text'] for doc in query_response['matches']]), [doc['metadata']['date'] for doc in query_response['matches']], [doc['metadata']['title'] for doc in query_response['matches']]


# Decomposition

In [None]:
def decomposition_query_process(question, text_answers, chat_history, text_date, text_title):
    """Implements decomposition query"""
    def decompose_question(question):
        """
        Decomposes a complex question into smaller questions.
        """
        prompt = prompt_templates.decomposition_template().format(question=question)
        response = LLM.invoke(prompt)
        subquestions = response.content.split("\n")
        print("Decomposing Question: Done!")

        return subquestions
    
    def generate_qa_pairs(subquestions, context):
        """Generates QA pairs by answering each subquestion."""
        qa_pairs = []
        for subquestion in subquestions:
            context = context
            rag_prompt = prompt_templates.qa_template().format(context=context, subquestion=subquestion)
            answer = LLM.invoke(rag_prompt)
            qa_pairs.append((subquestion, answer))
        print("Generating QA Pairs: Done!")

        return qa_pairs
    
    def build_final_answer(question, context, qa_pairs, chat_history, text_date, text_title):
        """Builds a final answer by integrating the context and QA pairs."""
        qa_pairs_str = "\n".join([f"Q: {q}\nA: {a}" for q, a in qa_pairs])
        final_prompt = prompt_templates.final_rag_template_with_memory().format(context=context, qa_pairs=qa_pairs_str, question=question, chat_history=chat_history, text_date=text_date, text_title=text_title)
        final_response = LLM.invoke(final_prompt)
        print("Building Final Answer: Done!")

        return final_response
    
    subquestions = decompose_question(question)
    qa_pairs = generate_qa_pairs(subquestions, text_answers)
    print(qa_pairs)
    final_answer = build_final_answer(question, text_answers, qa_pairs, chat_history, text_date, text_title)

    return final_answer.content

# response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=process_chat_history(chat_history))
# print(response)
# type(response)

In [19]:
def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

In [20]:
def generate_followup_question(question, meeting_summaries):
    """
    Generate followup response based on the previous query.
    """
    followup_prompt = prompt_templates.followup_template().format(question=question, meeting_list=meeting_summaries)
    followup_response = LLM.invoke(followup_prompt)
    print("Generating followup question: Done!")

    return followup_response.content


# Main

In [31]:
def CHATBOT():    
    print(f"Question: {query}")
    print(f"Current User ID: {user_id}")
    print(f"Current Session ID: {session_id}")
    print(f"Organization: {organization}")
    index = pc.Index(name=organization.lower())

    chat_history = ["What transpire during the meeting?", "Could you please specify which meeting you are referring to or the particular aspect of the meeting you would like more details about?"]
    meeting_summaries = fetch_summaries_by_organization(organization=organization)

    query_embeddings = get_embeddings(text=query)
    meeting_title = resolve_namespace_cf2(query_text=query, organization=organization)

    if meeting_title == "":
        print("AMBIGUOUS MATCH")
        response = generate_followup_question(query, meeting_summaries)
        return response

    text_answers, text_date, text_title = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
    print(f"Retrieved context: {text_answers}\nDate context: {text_date[0]}\nTitle Context: {text_title[0]}")

    # chat_history 

    response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=process_chat_history(chat_history), text_date=text_date[0], text_title=text_title[0])

    print("User Query:", query)
    print("Chatbot Response:", response)

answer = CHATBOT()
print(answer)

Question: What was the primary objective of the meeting?
Current User ID: WuhmTzwTwmerjkSSK4XT8FyJS263
Current Session ID: session11
Organization: SCS
Fetched summaries for organization 'SCS': {'Discussion on Digits Experiment Results and Scheduling for Forced Alignment Study': 'The meeting, led by Professor B and attended by various graduate students and postdocs, focused on two main agenda items: discussing results from a digits experiment and planning for a forced alignment study. Grad E initiated the meeting, confirming attendance and outlining the agenda. The group discussed the performance of different microphones used in the digits task, noting that the lapel microphone performed surprisingly well due to its quality. They also talked about the challenges of aligning schedules for an upcoming Saturday meeting and considered the possibility of including a colleague via conference call. The discussion included technical details about recognition systems, adaptation methods, and the