diff --git a/cortex/cli.py b/cortex/cli.py index e575db73..61d49fa5 100644 --- a/cortex/cli.py +++ b/cortex/cli.py @@ -52,8 +52,12 @@ def _get_api_key(self) -> str | None: is_valid, detected_provider, error = validate_api_key() if not is_valid: self._print_error(error) - cx_print("Run [bold]cortex wizard[/bold] to configure your API key.", "info") - cx_print("Or use [bold]CORTEX_PROVIDER=ollama[/bold] for offline mode.", "info") + cx_print( + "Run [bold]cortex wizard[/bold] to configure your API key.", "info" + ) + cx_print( + "Or use [bold]CORTEX_PROVIDER=ollama[/bold] for offline mode.", "info" + ) return None api_key = os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("OPENAI_API_KEY") return api_key @@ -105,7 +109,9 @@ def notify(self, args): """Handle notification commands""" # Addressing CodeRabbit feedback: Handle missing subcommand gracefully if not args.notify_action: - self._print_error("Please specify a subcommand (config/enable/disable/dnd/send)") + self._print_error( + "Please specify a subcommand (config/enable/disable/dnd/send)" + ) return 1 mgr = NotificationManager() @@ -135,7 +141,9 @@ def notify(self, args): elif args.notify_action == "disable": mgr.config["enabled"] = False mgr._save_config() - cx_print("Notifications disabled (Critical alerts will still show)", "warning") + cx_print( + "Notifications disabled (Critical alerts will still show)", "warning" + ) return 0 elif args.notify_action == "dnd": @@ -171,6 +179,13 @@ def notify(self, args): # ------------------------------- + # Run system health checks + def doctor(self): + from cortex.doctor import SystemDoctor + + doctor = SystemDoctor() + return doctor.run_checks() + def install(self, software: str, execute: bool = False, dry_run: bool = False): # Validate input first is_valid, error = validate_install_request(software) @@ -261,7 +276,9 @@ def progress_callback(current, total, step): # Record successful installation if install_id: - history.update_installation(install_id, InstallationStatus.SUCCESS) + history.update_installation( + install_id, InstallationStatus.SUCCESS + ) print(f"\nšŸ“ Installation recorded (ID: {install_id})") print(f" To rollback: cortex rollback {install_id}") @@ -275,7 +292,9 @@ def progress_callback(current, total, step): ) if result.failed_step is not None: - self._print_error(f"Installation failed at step {result.failed_step + 1}") + self._print_error( + f"Installation failed at step {result.failed_step + 1}" + ) else: self._print_error("Installation failed") if result.error_message: @@ -292,17 +311,23 @@ def progress_callback(current, total, step): except ValueError as e: if install_id: - history.update_installation(install_id, InstallationStatus.FAILED, str(e)) + history.update_installation( + install_id, InstallationStatus.FAILED, str(e) + ) self._print_error(str(e)) return 1 except RuntimeError as e: if install_id: - history.update_installation(install_id, InstallationStatus.FAILED, str(e)) + history.update_installation( + install_id, InstallationStatus.FAILED, str(e) + ) self._print_error(f"API call failed: {str(e)}") return 1 except Exception as e: if install_id: - history.update_installation(install_id, InstallationStatus.FAILED, str(e)) + history.update_installation( + install_id, InstallationStatus.FAILED, str(e) + ) self._print_error(f"Unexpected error: {str(e)}") return 1 @@ -561,6 +586,7 @@ def show_rich_help(): table.add_row("history", "View history") table.add_row("rollback ", "Undo installation") table.add_row("notify", "Manage desktop notifications") # Added this line + table.add_row("doctor", "System health check") table.add_row("cache stats", "Show LLM cache statistics") console.print(table) @@ -592,7 +618,11 @@ def main(): ) # Global flags - parser.add_argument("--version", "-V", action="version", version=f"cortex {VERSION}") + parser.add_argument( + "--version", "-V", action="version", version=f"cortex {VERSION}" + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Show detailed output" parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output") parser.add_argument( "--offline", action="store_true", help="Use cached responses only (no network calls)" @@ -650,6 +680,13 @@ def main(): send_parser = notify_subs.add_parser("send", help="Send test notification") send_parser.add_argument("message", help="Notification message") send_parser.add_argument("--title", default="Cortex Notification") + send_parser.add_argument( + "--level", choices=["low", "normal", "critical"], default="normal" + ) + send_parser.add_argument("--actions", nargs="*", help="Action buttons") + # -------------------------- + + doctor_parser = subparsers.add_parser("doctor", help="Run system health check") send_parser.add_argument("--level", choices=["low", "normal", "critical"], default="normal") send_parser.add_argument("--actions", nargs="*", help="Action buttons") # -------------------------- @@ -688,6 +725,8 @@ def main(): # Handle the new notify command elif args.command == "notify": return cli.notify(args) + elif args.command == "doctor": + return cli.doctor() elif args.command == "cache": if getattr(args, "cache_action", None) == "stats": return cli.cache_stats() diff --git a/cortex/doctor.py b/cortex/doctor.py new file mode 100644 index 00000000..c8a8840c --- /dev/null +++ b/cortex/doctor.py @@ -0,0 +1,484 @@ +""" +System Health Check for Cortex Linux +Performs diagnostic checks and provides fix suggestions. +""" + +import sys +import os +import shutil +import subprocess +from typing import Optional, List +from pathlib import Path + +from rich.table import Table +from rich.panel import Panel +from rich import box + +from cortex.branding import console +from cortex.validators import validate_api_key + + +class SystemDoctor: + """ + Performs comprehensive system health checks and diagnostics. + + Checks for: + - Python version compatibility + - Required Python dependencies + - GPU drivers (NVIDIA/AMD) + - CUDA/ROCm availability + - Ollama installation and status + - API key configuration + - Disk space availability + - System memory + + Attributes: + warnings: List of non-critical issues found + failures: List of critical issues that may prevent operation + suggestions: List of fix commands for issues + passes: List of successful checks + """ + + def __init__(self) -> None: + """Initialize the SystemDoctor with empty check lists.""" + self.warnings: List[str] = [] + self.failures: List[str] = [] + self.suggestions: List[str] = [] + self.passes: List[str] = [] + + def run_checks(self) -> int: + """ + Run all health checks and return appropriate exit code. + + Exit codes: + 0: All checks passed, system is healthy + 1: Warnings found, system can operate but has recommendations + 2: Critical failures found, system may not work properly + + Returns: + int: Exit code reflecting system health status (0, 1, or 2) + """ + # Header + console.print() + console.print( + Panel.fit( + "[bold cyan]CORTEX SYSTEM CHECK[/bold cyan]", + border_style="cyan", + padding=(1, 4), + ) + ) + console.print() + + # Run all check groups + self._print_section("Python & Dependencies") + self._check_python() + self._check_dependencies() + + self._print_section("GPU & Acceleration") + self._check_gpu_driver() + self._check_cuda() + + self._print_section("AI & Services") + self._check_ollama() + self._check_api_keys() + + self._print_section("System Resources") + self._check_disk_space() + self._check_memory() + + # Print summary + self._print_summary() + + # Return appropriate exit code + if self.failures: + return 2 # Critical failures + elif self.warnings: + return 1 # Warnings only + return 0 # All good + + def _print_section(self, title: str) -> None: + """Print a section header for grouping checks.""" + console.print(f"\n[bold cyan]{title}[/bold cyan]") + + def _print_check( + self, status: str, message: str, suggestion: Optional[str] = None + ) -> None: + """ + Print a check result with appropriate formatting and colors. + + Args: + status: One of "PASS", "WARN", "FAIL", or "INFO" + message: Description of the check result + suggestion: Optional fix command or suggestion + """ + # Define symbols and colors + if status == "PASS": + symbol = "āœ“" + color = "bold green" + prefix = "[PASS]" + self.passes.append(message) + elif status == "WARN": + symbol = "⚠" + color = "bold yellow" + prefix = "[WARN]" + self.warnings.append(message) + if suggestion: + self.suggestions.append(suggestion) + elif status == "FAIL": + symbol = "āœ—" + color = "bold red" + prefix = "[FAIL]" + self.failures.append(message) + if suggestion: + self.suggestions.append(suggestion) + else: + symbol = "?" + color = "dim" + prefix = "[INFO]" + + # Print with icon prefix and coloring + console.print( + f" [cyan]CX[/cyan] [{color}]{symbol} {prefix}[/{color}] {message}" + ) + + def _check_python(self) -> None: + """Check Python version compatibility.""" + version = ( + f"{sys.version_info.major}." + f"{sys.version_info.minor}." + f"{sys.version_info.micro}" + ) + + if sys.version_info >= (3, 10): + self._print_check("PASS", f"Python {version}") + + elif sys.version_info >= (3, 8): + self._print_check( + "WARN", + f"Python {version} (3.10+ recommended)", + "Upgrade Python: sudo apt install python3.11", + ) + else: + self._print_check( + "FAIL", + f"Python {version} (3.10+ required)", + "Install Python 3.10+: sudo apt install python3.11", + ) + + def _check_dependencies(self) -> None: + """Check packages from ACTUAL requirements.txt.""" + missing = [] + requirements_path = Path("requirements.txt") + + if not requirements_path.exists(): + self._print_check("WARN", "No requirements.txt found") + return + + try: + with open(requirements_path) as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + pkg_name = line.split("==")[0].split(">")[0].split("<")[0] + try: + __import__(pkg_name) + except ImportError: + missing.append(pkg_name) + except Exception: + self._print_check("WARN", "Could not read requirements.txt") + return + + if not missing: + self._print_check("PASS", "All requirements.txt packages installed") + elif len(missing) < 3: + self._print_check( + "WARN", + f"Missing from requirements.txt: {', '.join(missing)}", + "Install dependencies: pip install -r requirements.txt", + ) + else: + self._print_check( + "FAIL", + f" Missing {len(missing)} packages from requirements.txt: {', '.join(missing[:3])}...", + "Install dependencies: pip install -r requirements.txt", + ) + + def _check_gpu_driver(self) -> None: + """Check for GPU drivers (NVIDIA or AMD ROCm).""" + # Check NVIDIA + if shutil.which("nvidia-smi"): + try: + result = subprocess.run( + [ + "nvidia-smi", + "--query-gpu=driver_version", + "--format=csv,noheader", + ], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0 and result.stdout.strip(): + version = result.stdout.strip().split("\n")[0] + self._print_check("PASS", f"NVIDIA Driver {version}") + return + except (subprocess.TimeoutExpired, Exception): + pass + + # Check AMD ROCm + if shutil.which("rocm-smi"): + try: + result = subprocess.run( + ["rocm-smi", "--showdriverversion"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + self._print_check("PASS", "AMD ROCm driver detected") + return + except (subprocess.TimeoutExpired, Exception): + pass + + # No GPU found - this is a warning, not a failure + self._print_check( + "WARN", + "No GPU detected (CPU-only mode supported, local inference will be slower)", # ← NEW + "Optional: Install NVIDIA/AMD drivers for acceleration", + ) + + def _check_cuda(self) -> None: + """Check CUDA/ROCm availability for GPU acceleration.""" + # Check CUDA + if shutil.which("nvcc"): + try: + result = subprocess.run( + ["nvcc", "--version"], capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0 and "release" in result.stdout: + version_line = ( + result.stdout.split("release")[1].split(",")[0].strip() + ) + self._print_check("PASS", f"CUDA {version_line}") + return + except (subprocess.TimeoutExpired, Exception): + pass + + # Check ROCm + rocm_info_path = Path("/opt/rocm/.info/version") + if rocm_info_path.exists(): + try: + version = rocm_info_path.read_text(encoding="utf-8").strip() + self._print_check("PASS", f"ROCm {version}") + return + except (OSError, UnicodeDecodeError): + self._print_check("PASS", "ROCm installed") + return + elif Path("/opt/rocm").exists(): + self._print_check("PASS", "ROCm installed") + return + + # Check if PyTorch has CUDA available (software level) + try: + import torch + + if torch.cuda.is_available(): + self._print_check("PASS", "CUDA available (PyTorch)") + return + except ImportError: + pass + + self._print_check( + "WARN", + "CUDA/ROCm not found (GPU acceleration unavailable)", + "Install CUDA: https://developer.nvidia.com/cuda-downloads", + ) + + def _check_ollama(self) -> None: + """Check if Ollama is installed and running.""" + # Check if installed + if not shutil.which("ollama"): + self._print_check( + "WARN", + "Ollama not installed", + "Install Ollama: curl https://ollama.ai/install.sh | sh", + ) + return + + # Check if running by testing the API + try: + import requests + + response = requests.get("http://localhost:11434/api/tags", timeout=2) + if response.status_code == 200: + self._print_check("PASS", "Ollama installed and running") + return + except Exception: + pass + + # Ollama installed but not running + self._print_check( + "WARN", "Ollama installed but not running", "Start Ollama: ollama serve &" + ) + + def _check_api_keys(self) -> None: + """Check if API keys are configured for cloud models.""" + is_valid, provider, error = validate_api_key() + + if is_valid: + self._print_check("PASS", f"{provider} API key configured") + else: + self._print_check( + "WARN", + "No API keys configured (required for cloud models)", + "Configure API key: export ANTHROPIC_API_KEY=sk-... " + "or run 'cortex wizard'", + ) + + def _check_disk_space(self) -> None: + """Check available disk space for model storage.""" + try: + usage = shutil.disk_usage(os.path.expanduser("~")) + free_gb = usage.free / (1024**3) + total_gb = usage.total / (1024**3) + + if free_gb > 20: + self._print_check( + "PASS", f"{free_gb:.1f}GB free disk space ({total_gb:.1f}GB total)" + ) + elif free_gb > 10: + self._print_check( + "WARN", + f"{free_gb:.1f}GB free (20GB+ recommended for models)", + "Free up disk space: sudo apt clean && docker system prune -a", + ) + else: + self._print_check( + "FAIL", + f"Only {free_gb:.1f}GB free (critically low)", + "Free up disk space: sudo apt autoremove && sudo apt clean", + ) + except (OSError, Exception) as e: + self._print_check("WARN", f"Could not check disk space: {type(e).__name__}") + + def _check_memory(self) -> None: + """Check system RAM availability.""" + mem_gb = self._get_system_memory() + + if mem_gb is None: + self._print_check("WARN", "Could not detect system RAM") + return + + if mem_gb >= 16: + self._print_check("PASS", f"{mem_gb:.1f}GB RAM") + elif mem_gb >= 8: + self._print_check( + "WARN", + f"{mem_gb:.1f}GB RAM (16GB recommended for larger models)", + "Consider upgrading RAM or use smaller models", + ) + else: + self._print_check( + "FAIL", + f"Only {mem_gb:.1f}GB RAM (8GB minimum required)", + "Upgrade RAM to at least 8GB", + ) + + def _get_system_memory(self) -> Optional[float]: + """ + Get system memory in GB. + + Returns: + float: Total system memory in GB, or None if detection fails + """ + # Try /proc/meminfo (Linux) + try: + with open("/proc/meminfo", "r", encoding="utf-8") as f: + for line in f: + if line.startswith("MemTotal:"): + mem_kb = int(line.split()[1]) + return mem_kb / (1024**2) + except (OSError, ValueError, IndexError): + pass + + # Try psutil (macOS/BSD/Windows) + try: + import psutil + + return psutil.virtual_memory().total / (1024**3) + except ImportError: + pass + + return None + + def _print_summary(self) -> None: + """Print summary table and overall health status with suggestions.""" + console.print() + + # Create summary table + table = Table(show_header=False, box=box.SIMPLE, padding=(0, 1)) + table.add_column("Status", style="bold") + table.add_column("Count", justify="right") + + if self.passes: + table.add_row( + "[green]āœ“ Passed[/green]", f"[green]{len(self.passes)}[/green]" + ) + if self.warnings: + table.add_row( + "[yellow]⚠ Warnings[/yellow]", f"[yellow]{len(self.warnings)}[/yellow]" + ) + if self.failures: + table.add_row("[red]āœ— Failures[/red]", f"[red]{len(self.failures)}[/red]") + + console.print(table) + console.print() + + # Overall status panel + if self.failures: + console.print( + Panel( + f"[bold red]āŒ {len(self.failures)} critical failure(s) found[/bold red]", + border_style="red", + padding=(0, 2), + ) + ) + elif self.warnings: + console.print( + Panel( + f"[bold yellow]āš ļø {len(self.warnings)} warning(s) found[/bold yellow]", + border_style="yellow", + padding=(0, 2), + ) + ) + else: + console.print( + Panel( + "[bold green]āœ… All checks passed! System is healthy.[/bold green]", + border_style="green", + padding=(0, 2), + ) + ) + + # Show fix suggestions if any + if self.suggestions: + console.print() + console.print("[bold cyan]šŸ’” Suggested fixes:[/bold cyan]") + for i, suggestion in enumerate(self.suggestions, 1): + console.print(f" [dim]{i}.[/dim] {suggestion}") + console.print() + + +def run_doctor() -> int: + """ + Run the system doctor and return exit code. + + Returns: + int: Exit code (0 = all good, 1 = warnings, 2 = failures) + """ + doctor = SystemDoctor() + return doctor.run_checks() + + +if __name__ == "__main__": + sys.exit(run_doctor()) diff --git a/tests/test_doctor.py b/tests/test_doctor.py new file mode 100644 index 00000000..fd1794a3 --- /dev/null +++ b/tests/test_doctor.py @@ -0,0 +1,126 @@ +""" +Unit tests for cortex/doctor.py - System Health Check +""" + +import sys +import pytest +from collections import namedtuple +from unittest.mock import patch, MagicMock, mock_open + +from cortex.doctor import SystemDoctor + + +class TestSystemDoctorInit: + def test_init_empty_lists(self): + doctor = SystemDoctor() + assert doctor.passes == [] + assert doctor.warnings == [] + assert doctor.failures == [] + assert doctor.suggestions == [] + + +class TestPythonVersionCheck: + VersionInfo = namedtuple("VersionInfo", "major minor micro releaselevel serial") + + @pytest.mark.parametrize( + "version_tuple, status", + [ + ((3, 12, 3), "PASS"), + ((3, 9, 0), "WARN"), + ((3, 7, 0), "FAIL"), + ], + ) + def test_python_version_scenarios(self, monkeypatch, version_tuple, status): + doctor = SystemDoctor() + + vi = self.VersionInfo( + version_tuple[0], version_tuple[1], version_tuple[2], "final", 0 + ) + monkeypatch.setattr(sys, "version_info", vi) + + doctor._check_python() + + if status == "PASS": + assert any("Python 3.12.3" in msg for msg in doctor.passes) + elif status == "WARN": + assert any("Python 3.9.0" in msg for msg in doctor.warnings) + else: + assert any("Python 3.7.0" in msg for msg in doctor.failures) + + +class TestRequirementsTxtDependencies: + def test_requirements_txt_all_installed(self): + doctor = SystemDoctor() + mock_content = "anthropic\nopenai\nrich\n" + + with patch("builtins.open", mock_open(read_data=mock_content)): + with patch("builtins.__import__", return_value=MagicMock()): + doctor._check_dependencies() + + assert "All requirements.txt packages installed" in doctor.passes[0] + + def test_some_dependencies_missing(self): + doctor = SystemDoctor() + + def mock_import(name, *args, **kwargs): + if name in ["anthropic", "openai"]: + raise ImportError() + return MagicMock() + + mock_content = "anthropic\nopenai\nrich\n" + with patch("builtins.open", mock_open(read_data=mock_content)): + with patch("builtins.__import__", side_effect=mock_import): + doctor._check_dependencies() + + assert "Missing from requirements.txt" in doctor.warnings[0] + + +class TestGPUDriverCheck: + def test_cpu_only_message(self): + doctor = SystemDoctor() + with patch("shutil.which", return_value=None): + doctor._check_gpu_driver() + assert "CPU-only mode" in doctor.warnings[0] + + +class TestExitCodes: + """ + IMPORTANT: run_checks() calls all checks; without patching, your real system + will produce warnings/failures and exit code 2, which is why your previous + tests saw 2 instead of 1/0. + """ + + @patch.object(SystemDoctor, "_check_python") + @patch.object(SystemDoctor, "_check_dependencies") + @patch.object(SystemDoctor, "_check_gpu_driver") + @patch.object(SystemDoctor, "_check_cuda") + @patch.object(SystemDoctor, "_check_ollama") + @patch.object(SystemDoctor, "_check_api_keys") + @patch.object(SystemDoctor, "_check_disk_space") + @patch.object(SystemDoctor, "_check_memory") + @patch.object(SystemDoctor, "_print_summary") + def test_exit_codes(self, *_mocks): + # all good → 0 + d = SystemDoctor() + d.passes = ["ok"] + d.warnings = [] + d.failures = [] + assert d.run_checks() == 0 + + # warnings only → 1 + d = SystemDoctor() + d.passes = ["ok"] + d.warnings = ["warn"] + d.failures = [] + assert d.run_checks() == 1 + + # failures present → 2 + d = SystemDoctor() + d.passes = ["ok"] + d.warnings = ["warn"] + d.failures = ["fail"] + assert d.run_checks() == 2 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])