<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/Knowledge_Base_Demo_(Python)_with_OpenAI_LLM_(Colab_Userdata).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import time
import os
from openai import OpenAI
import httpx # Import httpx for handling potential network errors
import asyncio # Import asyncio for asynchronous operations
import nest_asyncio # Import nest_asyncio to allow nested event loops in interactive environments

# Apply nest_asyncio to allow asyncio.run() to be called from a running loop
nest_asyncio.apply()

# --- OpenAI API Key Setup ---
# Using google.colab.userdata as requested for Colab environment
try:
    from google.colab import userdata
    api_key = userdata.get('OPENAI_API_KEY')
except ImportError:
    # Fallback for non-Colab environments (e.g., local execution)
    print("google.colab.userdata not found. Attempting to use OPENAI_API_KEY from environment variables.")
    api_key = os.getenv('OPENAI_API_KEY')
    if not api_key:
        print("WARNING: OPENAI_API_KEY environment variable not found. Please set it for local runs.")
        print("Using a placeholder API key. LLM calls will fail until a valid key is provided.")


if not api_key:
    # This placeholder will cause API calls to fail but allows the script to run for structural demo.
    client = OpenAI(api_key="sk-your-placeholder-key")
else:
    client = OpenAI(api_key=api_key)

# Simulated Knowledge Base data
# Each chunk has an 'id', 'text', and properties to simulate its temporal status
knowledge_base = [
    {
        'id': 'chunk1_ceo_old',
        'text': 'The CEO of ABC is John Smith.',
        'isOutdated': True,  # This chunk is explicitly marked as outdated
        'outdatedReason': 'Replaced by new CEO',
        'semanticMatchScore': 0.9  # Higher semantic similarity for CEO query
    },
    {
        'id': 'chunk2_ceo_new',
        'text': 'Jenna Brown became CEO of ABC in January 2025, replacing John Smith.',
        'isOutdated': False,  # This chunk is current
        'semanticMatchScore': 0.7  # Slightly lower semantic similarity but contains the latest info for CEO
    },
    {
        'id': 'chunk3_product_old',
        'text': 'Company XYZ\'s new smartphone, the \'Spectra X\', was announced for release in October 2024.',
        'isOutdated': True,
        'outdatedReason': 'Release postponed',
        'semanticMatchScore': 0.85 # Higher semantic similarity for product release query
    },
    {
        'id': 'chunk4_product_new',
        'text': 'Due to supply chain issues, Company XYZ has postponed the \'Spectra X\' smartphone release to Q1 2025.',
        'isOutdated': False,
        'semanticMatchScore': 0.6 # Lower semantic similarity for product release query, but is current
    }
]

def extract_keywords(question):
    """
    Extracts relevant keywords from the question to identify appropriate chunks.
    This is a simplified version for demo purposes.
    """
    question_lower = question.lower()
    keywords = []
    if "ceo" in question_lower:
        keywords.append("ceo")
        if "abc" in question_lower:
            keywords.append("abc")
    if "spectra x" in question_lower:
        # Use a phrase for better matching
        keywords.append("spectra x")
    if "smartphone" in question_lower:
        keywords.append("smartphone")
    if "release" in question_lower or "date" in question_lower:
        keywords.append("release")
    return keywords

def get_relevant_chunks(question, kb):
    """
    Filters the knowledge base for chunks relevant to the question based on keywords.
    """
    question_keywords = extract_keywords(question)
    relevant_chunks = []
    for chunk in kb:
        chunk_lower = chunk['text'].lower()
        # A chunk is considered relevant if it contains at least one of the question's keywords
        # or a specific unique identifier from the chunk (e.g., 'abc' or 'spectra x')
        is_relevant = False
        for kw in question_keywords:
            if kw in chunk_lower:
                is_relevant = True
                break
        if is_relevant:
            relevant_chunks.append(chunk)
    return relevant_chunks

async def simulate_llm_answer(context_text, question):
    """
    Makes an actual API call to an LLM (OpenAI's GPT-4o) to generate an answer
    based on the provided context and question.
    """
    print("  (Calling LLM...)")
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context. If the answer is not in the context, state that you cannot find it."},
                {"role": "user", "content": f"Context: {context_text}\nQuestion: {question}\nAnswer:"}
            ],
            max_completion_tokens=50
        )
        if response.choices and response.choices[0].message:
            return response.choices[0].message.content.strip()
        else:
            return "LLM did not return a valid response."
    except httpx.HTTPStatusError as e:
        print(f"  Error calling LLM (HTTP status {e.response.status_code}): {e.response.text}")
        return "Error: Could not get answer from LLM due to API error."
    except Exception as e:
        print(f"  An unexpected error occurred during LLM call: {e}")
        return "Error: Could not get answer from LLM."


async def query_classical_kb(question):
    """
    Simulates querying a Classical Knowledge Base.
    Prioritizes semantic similarity (semanticMatchScore) among relevant chunks,
    regardless of their temporal status.
    """
    print(f"\n--- Classical Knowledge Base Response for: '{question}' ---")
    print("Processing...")

    relevant_chunks = get_relevant_chunks(question, knowledge_base)

    retrieved_chunk = None
    if relevant_chunks:
        # Classical: Prioritize based on semanticMatchScore among all relevant chunks
        retrieved_chunk = max(relevant_chunks, key=lambda x: x.get('semanticMatchScore', 0))

    if not retrieved_chunk:
        print("No relevant context found for the classical KB.")
        answer = "No relevant context found."
    else:
        print(f"Retrieved Context: {retrieved_chunk['text']}")
        answer = await simulate_llm_answer(retrieved_chunk['text'], question)
    print(f"Answer: {answer}")


async def query_temporal_kb(question):
    """
    Simulates querying a Temporal Knowledge Base.
    First, finds relevant chunks. Among those, it prioritizes the non-outdated ones.
    If no current relevant chunk is found, it falls back to the most semantically
    relevant chunk (which might be outdated).
    """
    print(f"\n--- Temporal Knowledge Base Response for: '{question}' ---")
    print("Processing...")

    relevant_chunks = get_relevant_chunks(question, knowledge_base)

    current_relevant_chunks = [chunk for chunk in relevant_chunks if not chunk.get('isOutdated', True)]

    retrieved_chunk = None
    if current_relevant_chunks:
        # Temporal: Prioritize non-outdated, and then by semanticMatchScore among them
        retrieved_chunk = max(current_relevant_chunks, key=lambda x: x.get('semanticMatchScore', 0))
    elif relevant_chunks:
        # Fallback: If no current relevant chunk, use the most semantically relevant chunk
        # (even if it's outdated, because it's still the best "relevant" option)
        print("No current relevant chunk found for the temporal KB. Falling back to most semantically relevant chunk (may be outdated).")
        retrieved_chunk = max(relevant_chunks, key=lambda x: x.get('semanticMatchScore', 0))

    if not retrieved_chunk:
        print("No relevant context found for the temporal KB.")
        answer = "No relevant context found."
    else:
        print(f"Retrieved Context: {retrieved_chunk['text']}")
        answer = await simulate_llm_answer(retrieved_chunk['text'], question)
    print(f"Answer: {answer}")


async def main():
    """Main function to run the automatic demo with multiple test cases."""
    print("Welcome to the Knowledge Base Temporal Demo (Fully Automatic)!")
    print("This version uses OpenAI's GPT-4o to generate answers.")
    print("It attempts to fetch the API key using `google.colab.userdata`.")

    test_cases = [
        "Who is the CEO of Company ABC?",
        "What is the release date of the Spectra X smartphone?"
    ]

    for question in test_cases:
        print(f"\n--- Running Test Case: '{question}' ---")
        await query_classical_kb(question)
        await query_temporal_kb(question)
        print("-" * 50) # Separator for test cases

    print("\nAutomatic demo finished.")

if __name__ == "__main__":
    # The nest_asyncio.apply() call at the top of the script handles nested event loops.
    # We can now simply run asyncio.run(main()) directly.
    asyncio.run(main())


Welcome to the Knowledge Base Temporal Demo (Fully Automatic)!
This version uses OpenAI's GPT-4o to generate answers.
It attempts to fetch the API key using `google.colab.userdata`.

--- Running Test Case: 'Who is the CEO of Company ABC?' ---

--- Classical Knowledge Base Response for: 'Who is the CEO of Company ABC?' ---
Processing...
Retrieved Context: The CEO of ABC is John Smith.
  (Calling LLM...)
Answer: The CEO of Company ABC is John Smith.

--- Temporal Knowledge Base Response for: 'Who is the CEO of Company ABC?' ---
Processing...
Retrieved Context: Jenna Brown became CEO of ABC in January 2025, replacing John Smith.
  (Calling LLM...)
Answer: I cannot determine the current CEO of Company ABC as the context only provides information up to January 2025, when Jenna Brown became CEO.
--------------------------------------------------

--- Running Test Case: 'What is the release date of the Spectra X smartphone?' ---

--- Classical Knowledge Base Response for: 'What is the release