## Tool to get package details
This tool will be used later in langchain to get the details of packages on system which are vulnerable along with thier versions. This data can be fed to AI so it can further analyse on the details.

In [None]:
import subprocess
from collections import namedtuple, defaultdict
from typing import Dict, List, Optional
from pydantic import BaseModel, Field

class PackageDetails(BaseModel):
    version: Optional[str] = Field(default=None, description="Installed package version")
    vulnerabilities: List[str] = Field(default_factory=list, description="List of vulnerability IDs")

def get_package_versions(package_names: List[str]) -> Dict[str, Optional[str]]:
    """ Get installed package versions for multiple packages using single dpkg call

    Args:
        - package_names (List[str]): List of package names

    Returns:
        - Dict[str, Optional[str]]: Package name to version mapping
    """
    if not package_names:
        return {}
    
    versions = {}
    try:
        # Use dpkg to query all packages at once
        cmd = ["dpkg-query", "-W", "-f=${Package}:${Version}\n"] + package_names
        out = subprocess.run(cmd, capture_output=True, text=True, check=False).stdout
        
        for line in out.splitlines():
            if ':' in line:
                package, version = line.split(':', 1)
                versions[package] = version
                
        # Set None for packages that weren't found
        for package in package_names:
            if package not in versions:
                versions[package] = None
                
    except subprocess.CalledProcessError:
        # If batch query fails, return None for all packages
        versions = {package: None for package in package_names}
    
    return versions

def get_package_details() -> Dict[str, PackageDetails]:
    """ Get installed package details including vulnerability

    This function uses debsecan and dpkg to find installed version as well as active
    vulnerabilities in the package.

    Returns:
        - Dict[str, PackageDetails]: Package as key and PackageDetails as value
    """
    try:
        out = subprocess.run(["debsecan"], capture_output=True, text=True, check=True).stdout
        
        # First pass: collect all unique packages and their vulnerabilities
        package_vulnerabilities = defaultdict(list)
        for line in out.splitlines():
            parts = line.split()
            if len(parts) >= 2:
                vulnerability_id = parts[0]
                package = parts[1]
                package_vulnerabilities[package].append(vulnerability_id)
        
        # Get versions for all unique packages in a single batch call
        unique_packages = list(package_vulnerabilities.keys())
        package_versions = get_package_versions(unique_packages)
        
        # Build final result
        package_details = {}
        for package, vulnerabilities in package_vulnerabilities.items():
            package_details[package] = PackageDetails(
                version=package_versions.get(package),
                vulnerabilities=vulnerabilities
            )
        
        return package_details
        
    except subprocess.CalledProcessError as e:
        print(f"Error running debsecan: {e}")
        return {}

Function to return Debian version in a format liked by osv.dev. Note that sid/unstable is not recognized and probably falls under Debian:14.

In [None]:
import re
def get_debian_version() -> Optional[str]:
    """Detect the current Debian version from the system"""
    try:
        # Try to read from /etc/debian_version
        with open('/etc/debian_version', 'r') as f:
            version = f.read().strip()
            # Extract major version number (e.g., "12.7" -> "12")
            match = re.match(r'(\d+)', version)
            if match:
                return match.group(1)
            match = re.match(r'.*\/sid', version)
            if match:
                return "14"
    except (FileNotFoundError, IOError):
        pass
    
    return None

'14'

## Getting CVE data using osv.dev
We use osv.dev which is effort by Google to have all vulnerability related data in single place, irrespective of OS and CVE providers and in a consumable format. We use pydantic to define classes for parsing required information from the osv.dev api into proper python object. OSV schema is defined at https://ossf.github.io/osv-schema/ and a sample Debian CVE data from OSV is https://api.osv.dev/v1/vulns/DEBIAN-CVE-2024-7883

In [None]:
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any, Union
import subprocess
import re

class Severity(BaseModel):
    type: str = Field(description="Severity type, e.g., 'CVSSv3'")
    score: str = Field(description="Severity score, e.g., 'CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N'")

class Package(BaseModel):
    name: str = Field(description="Name of the package")
    ecosystem: str = Field(description="Ecosystem of the package, e.g., 'Debian:12'")
    purl: Optional[str] = Field(None, description="Package URL")

class Event(BaseModel):
    """Represents an event in a version range (introduced, fixed, etc.)"""
    introduced: Optional[str] = Field(None, description="Version where vulnerability was introduced")
    fixed: Optional[str] = Field(None, description="Version where vulnerability was fixed")
    last_affected: Optional[str] = Field(None, description="Last version affected by vulnerability")
    limit: Optional[str] = Field(None, description="Limit version for the range")
    
    def is_fixed_event(self) -> bool:
        """Check if this event represents a fix"""
        return self.fixed is not None
    
    def get_fixed_version(self) -> Optional[str]:
        """Get the fixed version if this is a fix event"""
        return self.fixed

class Range(BaseModel):
    """Represents a version range with events"""
    type: str = Field(description="Range type, e.g., 'ECOSYSTEM', 'SEMVER'")
    events: List[Event] = Field(description="List of events in this range")
    repo: Optional[str] = Field(None, description="Repository URL for this range")
    
    def has_fixed_event(self) -> bool:
        """Check if this range contains any fixed events"""
        return any(event.is_fixed_event() for event in self.events)
    
    def get_fixed_versions(self) -> List[str]:
        """Get all fixed versions in this range"""
        return [event.get_fixed_version() for event in self.events if event.is_fixed_event()]
    
    def get_latest_fixed_version(self) -> Optional[str]:
        """Get the latest fixed version in this range"""
        fixed_versions = self.get_fixed_versions()
        return fixed_versions[-1] if fixed_versions else None

class Affected(BaseModel):
    package: Package = Field(description="Affected package details")
    versions: Optional[List[str]] = Field(None, description="List of affected versions")
    ranges: Optional[List[Range]] = Field(None, description="Affected version ranges")
    ecosystem_specific: Optional[Dict[str, Any]] = Field(None, description="Ecosystem-specific data")
    database_specific: Optional[Dict[str, Any]] = Field(None, description="Database-specific data")
    
    def has_any_fix(self) -> bool:
        """Check if any range in this affected package has a fix"""
        if not self.ranges:
            return False
        return any(range_item.has_fixed_event() for range_item in self.ranges)
    
    def get_all_fixed_versions(self) -> List[str]:
        """Get all fixed versions across all ranges"""
        if not self.ranges:
            return []
        
        fixed_versions = []
        for range_item in self.ranges:
            fixed_versions.extend(range_item.get_fixed_versions())
        return fixed_versions

class Reference(BaseModel):
    type: str = Field(description="Reference type, e.g., 'ADVISORY'")
    url: str = Field(description="Reference URL")


class DebianCVE(BaseModel):
    id: str = Field(description="CVE identifier, e.g., 'CVE-2023-4863'")
    details: str = Field(description="Detailed description of the CVE")
    modified: Optional[str] = Field(None, description="Modification timestamp")
    published: Optional[str] = Field(None, description="Publication timestamp")
    upstream: Optional[List[str]] = Field(None, description="Upstream references")
    references: Optional[List[Reference]] = Field(None, description="List of references")
    severity: Optional[List[Severity]] = Field(None, description="List of severity assessments")
    affected: List[Affected] = Field(description="List of affected packages and versions")
    
    def has_fixes_available(self) -> bool:
        """Check if any affected package has fixes available"""
        return any(affected_pkg.has_any_fix() for affected_pkg in self.affected)
    
    def get_packages_with_fixes(self) -> List[Affected]:
        """Get all affected packages that have fixes available"""
        return [affected_pkg for affected_pkg in self.affected if affected_pkg.has_any_fix()]
    
    def get_packages_without_fixes(self) -> List[Affected]:
        """Get all affected packages that don't have fixes available"""
        return [affected_pkg for affected_pkg in self.affected if not affected_pkg.has_any_fix()]
    
    def filter_for_system_version(self, version: str) -> 'DebianCVE':
        """Filter for a specific Debian version (e.g., '12', '13')"""
        target_ecosystem = f"Debian:{version}"
        filtered_affected = [
            affected for affected in self.affected 
            if affected.package.ecosystem == target_ecosystem
        ]
        
        # Create a new instance with filtered data
        return DebianCVE(
            id=self.id,
            details=self.details,
            modified=self.modified,
            published=self.published,
            upstream=self.upstream,
            references=self.references,
            severity=self.severity,
            affected=filtered_affected
        )
    
    def filter_for_current_system(self) -> 'DebianCVE':
        """Return a new DebianCVE instance filtered for the current Debian version"""
        current_version = get_debian_version()
        if not current_version:
            # If we can't detect the version, return the original
            return self
        
        return self.filter_for_system_version(current_version)
    
    def get_available_ecosystems(self) -> List[str]:
        """Get list of all ecosystems in the affected packages"""
        return list(set(affected.package.ecosystem for affected in self.affected))
    
