# Libraries

In [1]:
from dotenv import load_dotenv
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import hashlib
from pinecone import Pinecone
from langchain_openai import OpenAI
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from sklearn.metrics.pairwise import cosine_similarity
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import firebase_admin
import google.cloud
from firebase_admin import credentials, firestore
from prompt_templates import prompt_templates
from langchain_core.prompts import MessagesPlaceholder
from google.cloud.firestore_v1.base_query import FieldFilter

  from tqdm.autonotebook import tqdm


# APIs

In [2]:
load_dotenv()

# Firestore Initialization
credential_path = r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path

if not firebase_admin._apps:
    cred = credentials.Certificate(r'C:\Codes\Django\thesis_django\echo_backend\echo_chatbot\ServiceAccountKey.json')
    firebase_admin.initialize_app(cred)

try:
    db = firestore.Client()
    print("*Firestore connected successfully!")
except Exception as e:
    print(f"Failed to connect to Firestore: {e}")

# API Keys Initialization
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

if not OPENAI_API_KEY:
    print("OpenAI API Key not found!")
if not PINECONE_API_KEY:
    print("Pinecone API Key not found!")

# Pinecone Initialization
try:
    pc = Pinecone(api_key=PINECONE_API_KEY)
    print("*Pinecone connected successfully!")
except Exception as e:
    print(f"Failed to connect to Pinecone: {e}")


# OpenAI Initialization
try:
    client=OpenAI(api_key=OPENAI_API_KEY)
    LLM = ChatOpenAI(temperature=0, model_name="gpt-4-turbo")
    EMBEDDINGS = OpenAIEmbeddings(model='text-embedding-3-small')           
    print("*OpenAI connected successfully!")
except Exception as e:
    print(f"Failed to connect to OpenAI: {e}")

*Firestore connected successfully!
*Pinecone connected successfully!
*OpenAI connected successfully!


# Query

In [None]:
query = "What?"
user_id = "WuhmTzwTwmerjkSSK4XT8FyJS263"
session_id = "session1"
organization = "SCS"

In [15]:
# Get Embeddings
def get_embeddings(text):
    """
    This function returns a list of the embeddings for a given query
    """
    text_embeddings = EMBEDDINGS.embed_query(text)
    print("Generating Embeddings: Done!")
    return text_embeddings

query_embeddings = get_embeddings(text=query)
print(query_embeddings)
type(query_embeddings)

Generating Embeddings: Done!
[-0.0474587120115757, -0.01712365634739399, 0.04378720745444298, 0.0067147910594940186, -0.0038708795327693224, -0.015175051055848598, -0.03361533582210541, 0.0236089788377285, -0.04059721156954765, -0.017319269478321075, 0.03539089858531952, -0.024541903287172318, -0.004213202279061079, -0.03367552161216736, 0.02998897060751915, 0.028002746403217316, -0.05456096678972244, 0.028740057721734047, -0.0156791303306818, -0.01590483821928501, -0.002990621142089367, -0.002772437408566475, -0.009073431603610516, -0.002112243790179491, 0.0076176198199391365, -0.02493312768638134, -0.034217219799757004, -0.017605166882276535, 0.05131078138947487, 0.025580156594514847, 0.051792293787002563, -0.035059861838817596, 0.012376281432807446, 0.007504766341298819, -0.05642681568861008, 0.04372701793909073, 0.000296005659038201, -0.0006409145426005125, -0.03900221362709999, 0.020930586382746696, 0.0023191419895738363, -0.06440180540084839, -0.009615128859877586, 0.060278885066

list

# Standard Resolve Namespace

In [16]:
def resolve_namespace(query_embeddings, organization):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }

        print("Computed similarities:", similarities)

        ranked_namespaces = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        print("Ranked namespaces:", ranked_namespaces)
        
        return ranked_namespaces[0][0]
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization)
print(meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

In [9]:
# Get Relevant Documents
def query_pinecone_index(query_embeddings, meeting_title, index, top_k=5, include_metadata=True):
    """
    Query a Pinecone index.
    """
    # Build filter conditions directly for Pinecone
    filter_conditions = {}

    # Include date and meeting title if specified
    if meeting_title.lower() != 'unknown':
        filter_conditions['title'] = meeting_title

    # Query Pinecone using the build filter conditions
    query_response = index.query(
        vector=query_embeddings,
        filter=filter_conditions,
        top_k=top_k,
        include_metadata=include_metadata,
        namespace=meeting_title )

    print("Querying Pinecone Index: Done!")
    return [match['metadata']['text'] for match in query_response['matches']], [match['metadata']['date'] for match in query_response['matches']], [match['metadata']['title'] for match in query_response['matches']]

index = pc.Index(organization.lower())
text_answers, dates, titles = query_pinecone_index(query_embeddings=query_embeddings, meeting_title=meeting_title, index=index)
print(f"{text_answers}\n{dates[0]}\n{titles[0]}")
type(text_answers)
type(dates)
type(titles)

Querying Pinecone Index: Done!
["[00:00:00] John: Good morning, everyone. Thank you for joining today's kickoff meeting for our new\nsoftware development project. We'll be discussing the project scope, timelines, and\nresponsibilities. Let's get started with a quick round of introductions. I'll go first. I'm John, the\nproject manager. I'll be overseeing the project and ensuring we stay on track. Alice, would you\nlike to go next?\n[00:00:20]", "[00:00:55]\nJohn: Great, thank you. Now that we've introduced ourselves, let's dive into the project scope.\nOur goal is to develop a new customer management system for our client. The system should\nallow users to manage customer data, track interactions, and generate reports. Alice, can you\ngive us an overview of the technical requirements?\n[00:01:20]\nAlice: Sure, John. The system will be built using a microservices architecture. We'll be using", "Alice: Sure, thanks John. Hi, everyone. I'm Alice, the lead developer. I'll be responsible fo

list

In [11]:
def initialize_chat_history(user_id, session_id):
    """
    Initializes a chat history object.
    """
    chat_history = []
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    doc_snapshot = doc_ref.get()
    try:
        if doc_snapshot.exists:
            messages = doc_snapshot.get('messages')
            if messages is None:
                print(f"No 'messages' field found in document for user_id={user_id}, session_id={session_id}")
                return chat_history
            messages = doc_snapshot.get('messages')

            for message in messages:
                chat_history.append(message)
            print(f"Chat History Initialized: {chat_history}")
        else:
            print(f"No document found for user_id={user_id}, session_id={session_id}")
    except Exception as e:
        print(f"Error initializing chat history: {str(e)}")
    
    return chat_history

def update_chat_history(user_id, session_id, chat_history):
    """
    Updates the chat history object.
    """
    doc_ref = db.collection("chatHistory").document(user_id).collection("session").document(session_id)
    try:
        doc_ref.update({
            'messages': chat_history
        })
    except Exception as e:
        print(f"Error updating chat history: {str(e)}")

def process_chat_history(chat_history):
    """
    Changes the chat history list into a HumanMessages and AIMessages Schema
    """
    process_chat_history = []
    for idx, message in enumerate(chat_history):
        if idx % 2 == 0:
            process_chat_history.append(HumanMessage(message))
        else:
            process_chat_history.append(AIMessage(message))

        
    return process_chat_history

chat_history = initialize_chat_history(user_id=user_id, session_id=session_id)

Chat History Initialized: ['What is the title of the meeting where John and Alice were present?', 'The title of the meeting where John and Alice were present is "Kickoff Meeting."', 'when was it held?', 'The meeting titled "Kickoff Meeting" where John and Alice were present does not have a specified date in the provided context. Therefore, I cannot determine when it was held based on the information given.', 'try again', 'The context provided does not mention a meeting titled "Kickoff Meeting" or the presence of John and Alice. Therefore, I cannot provide the title or the date of such a meeting based on the available information.', 'When was it held?', 'The context does not specify the date of the "Kickoff Meeting" or any meeting involving John and Alice. Therefore, I cannot determine when it was held based on the information given.', 'When was it held?', 'The "Kickoff Meeting" was held on December 9, 2024.', 'What was the meeting about?', "The meeting was about kicking off a new softw

# Reranking Resolve Namespace without Session Context

In [17]:
from sentence_transformers import CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity


reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def resolve_namespace(query_embeddings, organization, chat_history):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries, session_context):
        """
        Rank namespaces by semantic similarity to the query.
        """

        # Compute similarity with past session conversation
        session_embedding = get_embeddings(session_context) if session_context else None
        session_similarity = (cosine_similarity([query_embeddings], [session_embedding])[0][0] if session_embedding else 0)
        print("Computed Session Similarity: ", session_similarity)
        
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        print("Generated summary embeddings:", summary_embeddings)

        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        # If the session is highly relevant, return it
        if session_similarity > 0.85:
            print("\n✅ Continuing with the current session (high similarity)")
            return "Current Session Context"
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]
    
    session_context = " ".join(chat_history) if chat_history else ""
    print(session_context)
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries, session_context)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, chat_history=chat_history)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

What is the title of the meeting where John and Alice were present? The title of the meeting where John and Alice were present is "Kickoff Meeting." when was it held? The meeting titled "Kickoff Meeting" where John and Alice were present does not have a specified date in the provided context. Therefore, I cannot determine when it was held based on the information given. try again The context provided does not mention a meeting titled "Kickoff Meeting" or the presence of John and Alice. Therefore, I cannot provide the title or the date of such a meeting based on the available information. When was it held? The context does not specify the date of the "Kickoff Meeting" or any meeting involving John and Alice. Therefore, I cannot determine when it was held based on the information given. When was it held? The "Kickoff Meeting" was held on December 9, 2024. What was the meeting about? The meeting was about kicking off a new software development project aimed at creating a customer manageme

str

# Reranking Resolve Namespace with Session Context

In [None]:
from sentence_transformers import CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity


reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def resolve_namespace(query_embeddings, organization, chat_history):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries

    def get_most_similar_namespace(query_embeddings, summaries, session_context):
        """
        Rank namespaces by semantic similarity to the query.
        """

        # Create a context-aware query
        context_aware_query = f"{session_context} {query}" if session_context else query

        # Compute similarity with past session conversation
        query_embeddings = get_embeddings(context_aware_query)
        
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_candidates = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n🔹 Initial Ranking (Cosine Similarity):", ranked_candidates)

        # # If the session is highly relevant, return it
        # if session_similarity > 0.85:
        #     print("\n✅ Continuing with the current session (high similarity)")
        #     return "Current Session Context"
        
        # Prepare input for re-ranking
        cross_encoder_inputs = [(summaries[title], context_aware_query) for title, _ in ranked_candidates]

        # Compute cross-encoder scores
        scores = reranker.predict(cross_encoder_inputs)

        # Re-rank based on cross-encoder scores
        reranked_candidates = sorted(zip(ranked_candidates, scores), key=lambda x: x[1], reverse=True)
        print("\n🔹 Re-ranked Candidates (Cross-Encoder):", reranked_candidates)
        
        return reranked_candidates[0][0][0]
    
    session_context = " ".join(chat_history) if chat_history else ""
    print(session_context)
    
    summaries = fetch_summaries_by_organization(organization)

    namespace = get_most_similar_namespace(query_embeddings, summaries, session_context)
    print(f"Selected namespace: {namespace}")
    return namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, chat_history=chat_history)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

What is the title of the meeting where John and Alice were present? The title of the meeting where John and Alice were present is "Kickoff Meeting." when was it held? The meeting titled "Kickoff Meeting" where John and Alice were present does not have a specified date in the provided context. Therefore, I cannot determine when it was held based on the information given. try again The context provided does not mention a meeting titled "Kickoff Meeting" or the presence of John and Alice. Therefore, I cannot provide the title or the date of such a meeting based on the available information. When was it held? The context does not specify the date of the "Kickoff Meeting" or any meeting involving John and Alice. Therefore, I cannot determine when it was held based on the information given. When was it held? The "Kickoff Meeting" was held on December 9, 2024. What was the meeting about? The meeting was about kicking off a new software development project aimed at creating a customer manageme

str

# Namespace Prediction Using Session Context

In [19]:
from collections import Counter

def resolve_namespace(query_embeddings, organization, user_id, session_id):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def fetch_namespaces_used(user_id, session_id):
        return ["Project Meeting", "Kickoff Meeting"]

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }
        print("Computed Summary Similarity:", summary_similarities)

        # Rank by similarity
        ranked_namespaces = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n# Initial Ranking (Cosine Similarity):", ranked_namespaces)

        return ranked_namespaces
    
    
    summaries = fetch_summaries_by_organization(organization)
    ranked_namespaces = get_most_similar_namespace(query_embeddings, summaries)

    past_namespaces = fetch_namespaces_used(user_id, session_id)

    namespace_counts = Counter(past_namespaces)

    namespace_weights = {
        title: (namespace_counts.get(title, 0) / len(past_namespaces) if past_namespaces else 0) for title, _ in ranked_namespaces
    }

    final_scores = {
        title: (sim + namespace_weights.get(title, 0)) for title, sim in ranked_namespaces
    }

    final_namespace = max(final_scores, key=final_scores.get)

    print(f"Session-aware namespace ranking: {final_scores}")
    return final_namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, user_id=user_id, session_id=session_id)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

# Namespace Bayesian Updating

In [None]:
from collections import Counter
import math

def resolve_namespace(query_embeddings, organization, user_id, session_id):
    """
    Resolves the namespace by either selecting the most similar one
    """
    def fetch_summaries_by_organization(organization):
        """
        Fetches summaries by organization
        """
        summaries = {}
        meetings_ref = db.collection("Meetings")
        query = meetings_ref.where(filter=FieldFilter("organization", "==", organization))
        docs = query.stream()

        for doc in docs:
            data = doc.to_dict()
            meeting_title = data.get("meetingTitle")
            summary = data.get("meetingSummary")
            if meeting_title and summary:
                summaries[meeting_title] = summary
        
        print(f"Fetched summaries for organization '{organization}': {summaries}")
        return summaries
    
    def fetch_namespaces_used(user_id, session_id):
        return ["Project Meeting", "Project Meeting", "Project Meeting", "Kickoff Meeting", "Kickoff Meeting"]
    
    def get_bayesian_update_namespaces(past_namespaces, decay_rate=0.7):
        """
        Applies Bayesian updating to boost frequently used namespaces.
        """
        namespace_counts = Counter(past_namespaces)
        total_count = sum(namespace_counts.values())

        recency_weights = {title: (i + 1) ** decay_rate for i, title in enumerate(reversed(past_namespaces))}

        # Normalize weights
        weighted_counts = {title: recency_weights.get(title, 0) + namespace_counts[title] for title in namespace_counts}
        total_weighted_count = sum(weighted_counts.values())

        # Compute Bayesian probabilities with recency bias
        posteriors = {title: weighted_counts[title] / total_weighted_count for title in namespace_counts}
        print("\n# Bayesian Updating with Recency Bias: ", posteriors)

        return posteriors

    def get_most_similar_namespace(query_embeddings, summaries):
        """
        Rank namespaces by semantic similarity to the query.
        """
        # Compute similarity with meeting summaries
        summary_embeddings = {title: get_embeddings(summary) for title, summary in summaries.items()}
        summary_similarities = {
            title: cosine_similarity([query_embeddings], [embedding])[0][0] for title, embedding in summary_embeddings.items()
        }

        # Rank by similarity
        ranked_namespaces = sorted(summary_similarities.items(), key=lambda x: x[1], reverse=True)
        print("\n# Initial Ranking (Cosine Similarity):", ranked_namespaces)

        return ranked_namespaces
    
    
    summaries = fetch_summaries_by_organization(organization)

    past_namespaces = fetch_namespaces_used(user_id, session_id)

    bayesian_scores = get_bayesian_update_namespaces(past_namespaces)

    ranked_namespaces = get_most_similar_namespace(query_embeddings, summaries)

    final_scores = {
        title: (bayesian_scores.get(title, 0) + sim) for title, sim in ranked_namespaces
    }

    final_namespace = max(final_scores, key=final_scores.get)

    print(f"Bayesian-updating namespace ranking: {final_scores}")
    return final_namespace

meeting_title = resolve_namespace(query_embeddings=query_embeddings, organization=organization, user_id=user_id, session_id=session_id)
print("\n Namespace Selected: ", meeting_title)
type(meeting_title)

Fetched summaries for organization 'SCS': {'Kickoff Meeting': 'On January 15, 2024, a kickoff meeting was held for a new software development project focused on creating a customer management system. Participants included John (Project Manager), Alice (Lead Developer), Bob (UI/UX Designer), and Sara (QA Analyst). The team discussed the project scope, which includes managing customer data, tracking interactions, and generating reports, using a microservices architecture with Java, React, and PostgreSQL. The timeline is set over six months with phases for planning and design, development, testing, and deployment. Responsibilities were outlined, with Alice overseeing development, Bob handling design, Sara managing QA, and John coordinating the project. Regular bi-weekly check-ins will be conducted to ensure deadlines are met and address any issues promptly.', 'Project Meeting': 'During the project meeting on January 9, 2025, led by Czech, the team discussed the final preparations for the 

str

In [None]:
def decomposition_query_process(question, text_answers, chat_history):
    """Implements decomposition query"""

    def output_parser(output):
        """
        Helps parses the LLM output, prints it, and returns it.
        """
        print("\n" + output.content + "\n")

        return output.content

    def decompose_question(question):
        """
        Decomposes a complex question into smaller questions.
        """
        prompt = prompt_templates.decomposition_template().format(question=question)
        response = LLM.invoke(prompt)
        subquestions = response.content.split("\n")
        print("Decomposing Question: Done!")

        return subquestions
    
    def generate_qa_pairs(subquestions, context):
        """Generates QA pairs by answering each subquestion."""
        qa_pairs = []
        for subquestion in subquestions:
            context = context
            rag_prompt = prompt_templates.qa_template().format(context=context, subquestion=subquestion)
            answer = LLM.invoke(rag_prompt)
            qa_pairs.append((subquestion, answer))
        print("Generating QA Pairs: Done!")

        return qa_pairs
    
    def build_final_answer(question, context, qa_pairs):
        """Builds a final answer by integrating the context and QA pairs."""
        qa_pairs_str = "\n".join([f"Q: {q}\nA: {a}" for q, a in qa_pairs])
        # final_prompt = prompt_templates.final_rag_template().format(context=context, qa_pairs=qa_pairs_str, question=question)
        final_prompt = prompt_templates.final_rag_template_with_memory().format(context=context, qa_pairs=qa_pairs_str, question=question, chat_history=chat_history)
        final_response = LLM.invoke(final_prompt)
        print("Building Final Answer: Done!")

        return final_response
    
    subquestions = decompose_question(question)
    qa_pairs = generate_qa_pairs(subquestions, text_answers)
    print(qa_pairs)
    final_answer = build_final_answer(question, text_answers, qa_pairs)

    return output_parser(final_answer)

response = decomposition_query_process(question=query, text_answers=text_answers, chat_history=process_chat_history(chat_history))
print(response)
type(response)

Decomposing Question: Done!
Generating QA Pairs: Done!
[('1. What is the main objective of the project meeting?', AIMessage(content='The main objective of the project meeting is to go over the final details before the product launch, ensuring that all aspects of the project, including development, support, and marketing, are on track and any potential issues are addressed promptly to meet the launch date successfully.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 53, 'prompt_tokens': 441, 'total_tokens': 494, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_7c63087da1', 'finish_reason': 'stop', 'logprobs': None}, id='run-c9bb51b4-0a8f-41c0-b450-1a728ee2d26d-0', usage_metadata={'input_tokens': 441, 'output_tokens': 53, 

str