# Metadata

> Plugin metadata structures for tracking plugin information and resources

In [None]:
#| default_exp core.metadata

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from dataclasses import dataclass, field
from typing import Dict, Any, Optional, List

from cjm_fasthtml_plugins.core.execution_mode import PluginExecutionMode, CloudProviderType

## Remote Resource Information

For plugins that execute on remote/cloud resources, we track information about those resources.

In [None]:
#| export
@dataclass
class RemoteResourceInfo:
    """Information about a remote/cloud resource used by a plugin."""
    provider: CloudProviderType  # Cloud provider or service
    region: Optional[str] = None  # Cloud region/zone
    instance_id: Optional[str] = None  # VM/instance identifier
    job_id: Optional[str] = None  # Job/task identifier on remote system
    endpoint_url: Optional[str] = None  # HTTP endpoint for API access
    ssh_host: Optional[str] = None  # SSH host for remote access
    ssh_port: int = 22  # SSH port number
    status: str = "unknown"  # Current status (provisioning, running, stopping, stopped)
    resource_type: Optional[str] = None  # Instance type (e.g., 'p3.2xlarge', 'n1-standard-8')
    gpu_count: int = 0  # Number of GPUs
    gpu_type: Optional[str] = None  # GPU model (e.g., 'V100', 'A100', 'H100')
    estimated_cost_per_hour: Optional[float] = None  # Estimated hourly cost in USD
    metadata: Dict[str, Any] = field(default_factory=dict)  # Additional provider-specific metadata

In [None]:
# Example: Create remote resource info for AWS instance
aws_resource = RemoteResourceInfo(
    provider=CloudProviderType.AWS,
    region="us-west-2",
    instance_id="i-1234567890abcdef",
    resource_type="p3.8xlarge",
    gpu_count=4,
    gpu_type="V100",
    status="running",
    estimated_cost_per_hour=12.24,
    ssh_host="54.123.45.67"
)

print(f"Provider: {aws_resource.provider.value}")
print(f"Instance: {aws_resource.instance_id}")
print(f"GPUs: {aws_resource.gpu_count}x {aws_resource.gpu_type}")
print(f"Cost: ${aws_resource.estimated_cost_per_hour}/hour")

Provider: aws
Instance: i-1234567890abcdef
GPUs: 4x V100
Cost: $12.24/hour


In [None]:
# Example: Create remote resource info for Lambda Labs
lambda_resource = RemoteResourceInfo(
    provider=CloudProviderType.LAMBDA_LABS,
    instance_id="0x1a2b3c4d",
    resource_type="4x_A100",
    gpu_count=4,
    gpu_type="A100",
    status="provisioning",
    estimated_cost_per_hour=4.40,
    region="us-west-1"
)

print(f"Provider: {lambda_resource.provider.value}")
print(f"Status: {lambda_resource.status}")
print(f"Configuration: {lambda_resource.resource_type}")

Provider: lambda_labs
Status: provisioning
Configuration: 4x_A100


## Plugin Metadata

The main metadata structure that describes a plugin and tracks its runtime state.

In [None]:
#| export
@dataclass
class PluginMetadata:
    """Metadata describing a plugin for display and resource management without loading the plugin instance."""
    name: str  # Internal plugin identifier
    category: str  # Plugin category string (application-defined)
    title: str  # Display title for the plugin
    config_schema: Dict[str, Any]  # JSON Schema for plugin configuration
    description: Optional[str] = None  # Plugin description
    version: Optional[str] = None  # Plugin version
    is_configured: bool = False  # Whether the plugin has saved configuration
    
    # Lifecycle metadata
    execution_mode: PluginExecutionMode = PluginExecutionMode.IN_PROCESS  # How the plugin executes
    manages_child_processes: bool = False  # Whether plugin spawns child processes
    manages_external_resources: bool = False  # Whether plugin manages Docker/servers/etc.
    
    # Local resource tracking
    spawned_pids: List[int] = field(default_factory=list)  # List of child process PIDs
    container_id: Optional[str] = None  # Docker container ID if applicable
    conda_env_name: Optional[str] = None  # Conda environment name if applicable
    
    # Cloud/Remote resource tracking
    remote_resource: Optional[RemoteResourceInfo] = None  # Remote resource information if applicable
    
    def get_unique_id(self) -> str:  # String in format 'category_name'
        """Generate unique ID for this plugin."""
        return f"{self.category}_{self.name}"
    
    def is_local_execution(self) -> bool:  # True if execution is local
        """Check if plugin executes locally (not cloud/remote)."""
        local_modes = {
            PluginExecutionMode.IN_PROCESS,
            PluginExecutionMode.SUBPROCESS,
            PluginExecutionMode.DOCKER,
            PluginExecutionMode.CONDA_ENV,
            PluginExecutionMode.EXTERNAL_SERVICE
        }
        return self.execution_mode in local_modes
    
    def is_cloud_execution(self) -> bool:  # True if execution is cloud/remote
        """Check if plugin executes on cloud/remote resources."""
        return not self.is_local_execution()
    
    def has_active_resources(self) -> bool:  # True if plugin has child processes, containers, or cloud resources
        """Check if plugin has active managed resources."""
        return bool(
            self.spawned_pids or
            self.container_id or
            (self.remote_resource and self.remote_resource.status == "running")
        )

In [None]:
# Example: Create simple in-process plugin metadata
simple_plugin = PluginMetadata(
    name="whisper_tiny",
    category="transcription",
    title="Whisper Tiny Model",
    config_schema={
        "type": "object",
        "properties": {
            "device": {"type": "string", "enum": ["cpu", "cuda"], "default": "cpu"}
        }
    },
    version="1.0.0",
    is_configured=True
)

print(f"Plugin: {simple_plugin.title}")
print(f"Unique ID: {simple_plugin.get_unique_id()}")
print(f"Execution mode: {simple_plugin.execution_mode.value}")
print(f"Is local: {simple_plugin.is_local_execution()}")
print(f"Has active resources: {simple_plugin.has_active_resources()}")

Plugin: Whisper Tiny Model
Unique ID: transcription_whisper_tiny
Execution mode: in_process
Is local: True
Has active resources: False


In [None]:
# Example: Create plugin with subprocess (vLLM-style)
vllm_plugin = PluginMetadata(
    name="voxtral_vllm",
    category="transcription",
    title="Voxtral via vLLM Server",
    config_schema={"type": "object", "properties": {}},
    execution_mode=PluginExecutionMode.SUBPROCESS,
    manages_child_processes=True,
    manages_external_resources=True,
    spawned_pids=[54321, 54322, 54323]
)

print(f"Plugin: {vllm_plugin.title}")
print(f"Execution mode: {vllm_plugin.execution_mode.value}")
print(f"Manages child processes: {vllm_plugin.manages_child_processes}")
print(f"Spawned PIDs: {vllm_plugin.spawned_pids}")
print(f"Has active resources: {vllm_plugin.has_active_resources()}")

Plugin: Voxtral via vLLM Server
Execution mode: subprocess
Manages child processes: True
Spawned PIDs: [54321, 54322, 54323]
Has active resources: True


In [None]:
# Example: Create cloud-based plugin
cloud_plugin = PluginMetadata(
    name="llm_finetune_aws",
    category="finetuning",
    title="AWS LLM Finetuning",
    config_schema={"type": "object", "properties": {}},
    execution_mode=PluginExecutionMode.CLOUD_GPU,
    manages_external_resources=True,
    remote_resource=RemoteResourceInfo(
        provider=CloudProviderType.AWS,
        instance_id="i-abcd1234",
        status="running",
        gpu_count=8,
        gpu_type="A100",
        estimated_cost_per_hour=24.50
    )
)

print(f"Plugin: {cloud_plugin.title}")
print(f"Execution mode: {cloud_plugin.execution_mode.value}")
print(f"Is cloud: {cloud_plugin.is_cloud_execution()}")
print(f"Provider: {cloud_plugin.remote_resource.provider.value}")
print(f"Instance: {cloud_plugin.remote_resource.instance_id}")
print(f"Cost: ${cloud_plugin.remote_resource.estimated_cost_per_hour}/hour")
print(f"Has active resources: {cloud_plugin.has_active_resources()}")

Plugin: AWS LLM Finetuning
Execution mode: cloud_gpu
Is cloud: True
Provider: aws
Instance: i-abcd1234
Cost: $24.5/hour
Has active resources: True


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()