In [None]:
import os
import re
import json
import uuid
from dotenv import load_dotenv
from langchain.embeddings import init_embeddings
from langgraph.store.memory import InMemoryStore

In [19]:
load_dotenv(override=True)

True

In [20]:
THREAD = os.getenv('THREAD')
USER_ID = os.getenv('USER_ID')
GRAPH_PATH = os.getenv('GRAPH_PATH')
BASE_URL = os.getenv('BASE_URL')
OPENAI_CHAT_MODEL = os.getenv('OPENAI_CHAT_MODEL')
OPENAI_EMBED_MODEL = os.getenv('OPENAI_EMBED_MODEL')
OPENAI_API_PROXY = os.getenv('OPENAI_API_PROXY')

In [21]:
embeddings = init_embeddings(api_key=OPENAI_API_PROXY, base_url=BASE_URL, model=OPENAI_EMBED_MODEL)

store = InMemoryStore(index={"embed": embeddings, "dims": 1536, "fields": ["memory", "type"]})

In [22]:
CONFIG = {'configurable': {'thread_id': THREAD, 'recursion_limit': 3, "user_id": USER_ID}}

with open(GRAPH_PATH, "r") as f:
    graph_data = json.load(f)

In [23]:
def get_user_namespace(config=CONFIG):
    """Get user namespace tuple for store operations."""
    user_id = config.get("configurable", {}).get("user_id")
    if not user_id:
        raise ValueError("User ID not found in configuration.")
    return (user_id, "profile")

def validate_profile_type(profile_type):
    """Validate profile type against allowed values."""
    VALID_PROFILE_TYPES = {"name", "interests", "preferences", "goals"}
    if profile_type not in VALID_PROFILE_TYPES:
        raise ValueError(
            f"Invalid profile_type: '{profile_type}'. Valid types are: {', '.join(VALID_PROFILE_TYPES)}."
        )
    return True

def validate_concept_id(concept_id):
    """Validates concept ID format.
    
    Args:
        concept_id (str): The ID to validate
        
    Returns:
        tuple: (is_valid, error_message)
    """
    if not isinstance(concept_id, str):
        return False, "Error: Invalid concept ID type. Must be a string."
    
    pattern = re.compile(r'^[A-Z]\.\d+$')
    if not pattern.match(concept_id):
        return False, ("Error: Invalid concept ID format. "
                      "The correct format is a single uppercase letter followed by a dot and a number (e.g., 'A.1').")
    
    return True, None

def load_graph_data(path):
    """Loads and validates graph data from a JSON file.
    
    Args:
        path (str): Path to the JSON file
        
    Returns:
        tuple: (data, error_message)
    """
    if not os.path.exists(path):
        return None, f"Error: File '{path}' does not exist."
    
    try:
        with open(path, "r") as f:
            data = json.load(f)
    except json.JSONDecodeError as jde:
        return None, f"Error reading JSON from '{path}': {jde}"
    except Exception as e:
        return None, f"Error opening '{path}': {e}"
    
    if "concepts" not in data:
        return None, "Error: 'concepts' key not found in the JSON data."
    
    return data, None

In [24]:
def store_profile(content: str, profile_type: str,  config = CONFIG, store = store):
    """Stores a user profile attribute in the database.
    
    Args:
        content (str): The value of the profile attribute to store
        profile_type (str): The category or type of the profile attribute.  
                            Valid types include: "name", "interests", "preferences", "goals".

    Returns:
        str: A confirmation message indicating the successfully stored content and its unique identifier (UUID).
    
    Raises:
        ValueError: If an invalid `profile_type` is provided.  
    """
    VALID_PROFILE_TYPES = {"name", "interests", "preferences", "goals"}

    if profile_type not in VALID_PROFILE_TYPES:
        raise ValueError(
            f"Invalid profile_type: '{profile_type}'. Valid types are: {', '.join(VALID_PROFILE_TYPES)}."
        )

    user_id = config.get("configurable", {}).get("user_id")
    if not user_id:
        raise ValueError("User ID not found in configuration.")

    namespace = (user_id, "profile")
    memory_id = str(uuid.uuid4())
    
    store.put(
        namespace,
        key=memory_id,
        value={"memory": content, "type": profile_type},
        index=False
    )
    
    return f"Stored information: '{content}' | ID: {memory_id}"

In [25]:
import functools

@functools.lru_cache(maxsize=8)
def get_graph_data(path=GRAPH_PATH):
    """Load and cache graph data to avoid repeated file I/O."""
    if not os.path.exists(path):
        return None, f"Error: File '{path}' does not exist."
    
    try:
        with open(path, "r") as f:
            data = json.load(f)
    except json.JSONDecodeError as jde:
        return None, f"Error reading JSON from '{path}': {jde}"
    except Exception as e:
        return None, f"Error opening '{path}': {e}"
    
    if "concepts" not in data:
        return None, "Error: 'concepts' key not found in the JSON data."
    
    return data, None

In [26]:
def store_profile(content: str, profile_type: str, config=CONFIG, store=store):
    """Stores a user profile attribute in the database with improved error handling."""
    try:
        validate_profile_type(profile_type)
        namespace = get_user_namespace(config)
        
        memory_id = str(uuid.uuid4())
        store.put(
            namespace,
            key=memory_id,
            value={"memory": content, "type": profile_type},
            index=False
        )
        
        return f"Stored information: '{content}' | ID: {memory_id}"
    except ValueError as e:
        return f"Error: {str(e)}"

def retrieve_profile(profile_type: str, config=CONFIG, store=store):
    """Optimized profile retrieval with better error handling."""
    try:
        validate_profile_type(profile_type)
        namespace = get_user_namespace(config)
        
        results = store.search(namespace, filter={"type": profile_type})
        return [
            {"content": item.value.get("memory", ""), "id": item.key}
            for item in results
        ]
    except ValueError as e:
        return f"Error: {str(e)}"

def delete_profile(key: str, config=CONFIG, store=store):
    """Optimized profile deletion with consistent error handling."""
    try:
        namespace = get_user_namespace(config)
        success = store.delete(namespace, key)
        
        if success:
            return f"Profile entry with ID {key} has been successfully deleted."
        else:
            return f"Error: No profile entry found with ID {key}."
    except ValueError as e:
        return f"Error: {str(e)}"
    except Exception as e:
        return f"An error occurred while deleting the profile entry: {str(e)}"

In [27]:
def retrieve_sections(section_letter: str, data: dict = graph_data):
    """Optimized section retrieval with consistent validation."""
    if data is None:
        return "Error: 'data' dictionary not provided."
        
    if not isinstance(section_letter, str) or not section_letter.isalpha() or len(section_letter) != 1:
        return "Error: Invalid section letter. Must be a single alphabet character."
        
    if "concepts" not in data:
        return "Error: 'concepts' key not found in the data dictionary."

    # Use list comprehension for better efficiency
    return [
        {"id": concept_id, "label": concept_info["label"]}
        for concept_id, concept_info in data["concepts"].items()
        if concept_info["section"] == section_letter
    ]

def retrieve_concept(concept_id: str, data: dict = None):
    """Optimized concept retrieval with caching support."""
    if data is None:
        data, error = get_graph_data()
        if error:
            return error
    
    if not isinstance(concept_id, str):
        return "Error: Invalid concept ID. Must be a string."
        
    if "concepts" not in data:
        return "Error: 'concepts' key not found in the data dictionary."

    concept = data["concepts"].get(concept_id)
    return concept if concept else f"Error: Concept with ID '{concept_id}' not found."

def update_concept_status(concept_id: str, new_status: str, path: str = GRAPH_PATH):
    """Optimized status update using utility functions."""
    ALLOWED_STATUSES = {"mastery", "unlearned", "awareness"}
    
    valid, error = validate_concept_id(concept_id)
    if not valid:
        return error
    
    if not isinstance(new_status, str):
        return "Error: Invalid status type. Must be a string."
        
    if new_status not in ALLOWED_STATUSES:
        return f"Error: Invalid status '{new_status}'. Allowed statuses are: {', '.join(sorted(ALLOWED_STATUSES))}."
    
    data, error = get_graph_data(path)
    if error:
        return error
    
    if concept_id in data["concepts"]:
        current_status = data["concepts"][concept_id].get("status", "undefined")
        if current_status == new_status:
            return f"No update needed: Concept '{concept_id}' is already set to '{new_status}'."
            
        data["concepts"][concept_id]["status"] = new_status
        
        try:
            with open(path, "w") as f:
                json.dump(data, f, indent=4)
            get_graph_data.cache_clear()
            return f"Success: Status of concept '{concept_id}' updated from '{current_status}' to '{new_status}'."
        except Exception as e:
            return f"Error saving changes to '{path}': {e}"
    else:
        return f"Error: Concept with ID '{concept_id}' not found."

def knowledge_state(path: str = GRAPH_PATH):
    """Optimized knowledge state using cached data access."""
    data, error = get_graph_data(path)
    if error:
        return error
    
    concepts_with_status = [
        {
            "ID": cid,
            "Label": details.get("label", "N/A"),
            "Status": details.get("status", "N/A")
        }
        for cid, details in data["concepts"].items()
        if details.get("status")
    ]
    
    if not concepts_with_status:
        return "No concepts with a non-empty status found."
    
    concepts_with_status.sort(key=lambda x: x["ID"])
    return "\n".join(f"{c['ID']} {c['Label']} is {c['Status']}" for c in concepts_with_status)

def get_prerequisites(concept_id: str, path: str = GRAPH_PATH):
    """Optimized prerequisites retrieval using cached data access."""
    valid, error = validate_concept_id(concept_id)
    if not valid:
        return error
    
    data, error = get_graph_data(path)
    if error:
        return error
    
    if concept_id not in data["concepts"]:
        return f"Error: Concept with ID '{concept_id}' not found."
    
    prerequisites_ids = data["concepts"][concept_id].get("prerequisites", [])
    if not prerequisites_ids:
        return f"Concept '{concept_id}' has no prerequisites."
    
    prerequisites = []
    missing_prereqs = []
    
    for pid in prerequisites_ids:
        prereq = data["concepts"].get(pid)
        if prereq:
            prerequisites.append({"ID": pid, "Label": prereq.get("label", "N/A")})
        else:
            missing_prereqs.append(pid)
    
    output = [f"Prerequisites for Concept '{concept_id}':"]
    output.extend(f" - {p['ID']}: {p['Label']}" for p in prerequisites)
    
    if missing_prereqs:
        output.append("\nWarning: The following prerequisite IDs were not found in the data:")
        output.extend(f" - {pid}" for pid in missing_prereqs)
    
    return "\n".join(output)

In [28]:
retrieve_concept('A.1')

{'label': 'artificial intelligence',
 'content': 'A non-human program or model that can solve sophisticated tasks.\nFor example, a program or model that translates text or a program or model that\nidentifies diseases from radiologic images both exhibit artificial intelligence. Formally, machine learning is a sub-field of artificial\nintelligence. However, in recent years, some organizations have begun using the\nterms artificial intelligence and machine learning interchangeably.',
 'status': 'mastery',
 'prerequisites': ['A.3',
  'A.7',
  'A.14',
  'A.20',
  'A.28',
  'A.30',
  'D.3',
  'D.20',
  'A.19',
  'A.32',
  'A.35'],
 'section': 'A'}

In [29]:
retrieve_sections("A")

[{'id': 'A.1', 'label': 'artificial intelligence'},
 {'id': 'A.2', 'label': 'binary classification'},
 {'id': 'A.3', 'label': 'classification model'},
 {'id': 'A.4', 'label': 'classification threshold'},
 {'id': 'A.5', 'label': 'confusion matrix'},
 {'id': 'A.6', 'label': 'dataset'},
 {'id': 'A.7', 'label': 'dynamic model'},
 {'id': 'A.8', 'label': 'feature'},
 {'id': 'A.9', 'label': 'hyperparameter'},
 {'id': 'A.10', 'label': 'inference'},
 {'id': 'A.11', 'label': 'interpretability'},
 {'id': 'A.12', 'label': 'label'},
 {'id': 'A.13', 'label': 'linear '},
 {'id': 'A.14', 'label': 'linear model'},
 {'id': 'A.15', 'label': 'linear regression'},
 {'id': 'A.16', 'label': 'logistic regression'},
 {'id': 'A.17', 'label': 'loss'},
 {'id': 'A.18', 'label': 'loss function'},
 {'id': 'A.19', 'label': 'machine learning'},
 {'id': 'A.20', 'label': 'model'},
 {'id': 'A.21', 'label': 'multi-class classification'},
 {'id': 'A.22', 'label': 'nonlinear '},
 {'id': 'A.23', 'label': 'nonstationarity'},


In [30]:
update_concept_status("A.1", "o")

"Error: Invalid status 'o'. Allowed statuses are: awareness, mastery, unlearned."

In [31]:
update_concept_status("A.1", "mastery")

"No update needed: Concept 'A.1' is already set to 'mastery'."

In [32]:
print(knowledge_state())

A.1 artificial intelligence is mastery


In [33]:
get_prerequisites("A.3")

"Prerequisites for Concept 'A.3':\n - A.3: classification model\n - A.7: dynamic model\n - A.14: linear model\n - A.20: model\n - A.28: regression model\n - A.30: static model\n - D.3: deep model\n - D.20: large language model\n - A.2: binary classification\n - A.3: classification model\n - A.4: classification threshold\n - A.21: multi-class classification\n - E.5: class\n - E.6: class-imbalanced dataset\n - E.15: majority class\n - E.17: minority class\n - E.18: negative class\n - E.24: positive class\n - A.28: regression model\n - A.2: binary classification\n - A.21: multi-class classification\n - A.16: logistic regression\n - A.18: loss function"