In [None]:
#############################################
# File: llm_powered_knowledge_base.py
#############################################

"""
LLM-Powered Knowledge Base
--------------------------
Demonstrates how to build a simple knowledge base using
sentence embeddings for document retrieval and optionally
a Large Language Model (LLM) for final context-aware answers.

Before proceeding, make sure to install the required packages:
    pip install sentence-transformers torch
"""

# 1. Install required packages if needed (uncomment if you're in an environment like Google Colab)
# !pip install sentence-transformers torch

import os
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

##########################################
# Sample Data Creation for the Knowledge Base
##########################################
data = {
    "title": [
        "About OxeFit's Product",
        "Fitness Machine Specifications",
        "Data Requirements for Analysis",
        "AI Integration in Fitness"
    ],
    "content": [
        "OxeFit offers cutting-edge strength training machines that combine hardware and AI-driven software.",
        "The machine is designed with advanced sensors that track performance and gather real-time data.",
        "Data requirements include sensor readings, user performance metrics, and usage logs.",
        "AI is integrated to personalize workout programs and provide in-depth analytics."
    ]
}

kb_file = "knowledgebase.csv"
if not os.path.exists(kb_file):
    df_kb = pd.DataFrame(data)
    df_kb.to_csv(kb_file, index=False)

# 2. Load the Knowledge Base
kb = pd.read_csv(kb_file)

##########################################
# Embedding Model
##########################################
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight, general-purpose model
kb_embeddings = embedding_model.encode(kb['content'].tolist(), convert_to_tensor=True)

def retrieve_relevant_context(query, top_k=2):
    """
    Given a user query, find the top_k most relevant entries in the knowledge base.
    """
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, kb_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=top_k)

    relevant_contexts = []
    for score_id, score in zip(top_results[1], top_results[0]):
        relevant_contexts.append((kb.loc[score_id.item(), 'content'], float(score.item())))
    return relevant_contexts

if __name__ == "__main__":
    user_question = "How does OxeFit use AI?"
    print(f"User Question: {user_question}\n")

    # Retrieve most relevant context
    relevant_info = retrieve_relevant_context(user_question, top_k=2)
    for i, (context, cos_score) in enumerate(relevant_info, start=1):
        print(f"Relevant Context {i} (Score: {cos_score:.4f}): {context}")

# Created/Modified files during execution:
print(kb_file)