## Vulnerability Class 
Holds the data about CVE as well as related EPSS score and percentile. Mostly used to parse the EPSS data from json for downloaded csv file

In [None]:
from pydantic import BaseModel, Field
from typing import Optional

class Vulnerability(BaseModel):
    cve: str = Field(description="Vulnerability identifier, e.g., 'CVE-2023-4863'")
    epss: float = Field(description="Probability of exploitation (0.0-1.0)")
    percentile: float = Field(description="Percentile rank of this CVE rank")
    date: Optional[str] = Field(None, description="Date of the EPSS data retrieval")

Function to correctly translate Debian version including sid to a format recognized by osv.dev data

In [None]:
import re
def get_debian_version() -> Optional[str]:
    """Detect the current Debian version from the system"""
    try:
        # Try to read from /etc/debian_version
        with open('/etc/debian_version', 'r') as f:
            version = f.read().strip()
            # Extract major version number (e.g., "12.7" -> "12")
            match = re.match(r'(\d+)', version)
            if match:
                return match.group(1)
            match = re.match(r'.*\/sid', version)
            if match:
                return "14"
    except (FileNotFoundError, IOError):
        pass
    
    return None

## Pydantic objects for parsing OSV CVE Data

Helper class to cleanly unmarshall CVE data from osv.dev as defined by the schema https://ossf.github.io/osv-schema/

In [None]:
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any

class Severity(BaseModel):
    type: str = Field(description="Severity type, e.g., 'CVSSv3'")
    score: str = Field(description="Severity score, e.g., 'CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N'")

    def __str__(self) -> str:
        return f"{self.score}"
    
class Package(BaseModel):
    name: str = Field(description="Name of the package")
    ecosystem: str = Field(description="Ecosystem of the package, e.g., 'Debian:12'")
    purl: Optional[str] = Field(None, description="Package URL")

    def __str__(self) -> str:
        return f"{self.name}"

class Event(BaseModel):
    """Represents an event in a version range (introduced, fixed, etc.)"""
    introduced: Optional[str] = Field(None, description="Version where vulnerability was introduced")
    fixed: Optional[str] = Field(None, description="Version where vulnerability was fixed")
    last_affected: Optional[str] = Field(None, description="Last version affected by vulnerability")
    limit: Optional[str] = Field(None, description="Limit version for the range")
    
    def is_fixed_event(self) -> bool:
        """Check if this event represents a fix"""
        return self.fixed is not None
    
    def get_fixed_version(self) -> Optional[str]:
        """Get the fixed version if this is a fix event"""
        return self.fixed
    
    def __str__(self) -> str:
        if self.introduced:
            return f"Introduced in {self.introduced}"
        if self.fixed:
            return f"Fixed in {self.fixed}"
        return "Not fixed"

class Range(BaseModel):
    """Represents a version range with events"""
    type: str = Field(description="Range type, e.g., 'ECOSYSTEM', 'SEMVER'")
    events: List[Event] = Field(description="List of events in this range")
    repo: Optional[str] = Field(None, description="Repository URL for this range")
    
    def has_fixed_event(self) -> bool:
        """Check if this range contains any fixed events"""
        return any(event.is_fixed_event() for event in self.events)
    
    def get_fixed_versions(self) -> List[str]:
        """Get all fixed versions in this range"""
        return [event.get_fixed_version() for event in self.events if event.is_fixed_event()]
    
    def get_latest_fixed_version(self) -> Optional[str]:
        """Get the latest fixed version in this range"""
        fixed_versions = self.get_fixed_versions()
        return fixed_versions[-1] if fixed_versions else None

class Affected(BaseModel):
    package: Package = Field(description="Affected package details")
    versions: Optional[List[str]] = Field(None, description="List of affected versions")
    ranges: Optional[List[Range]] = Field(None, description="Affected version ranges")
    ecosystem_specific: Optional[Dict[str, Any]] = Field(None, description="Ecosystem-specific data")
    database_specific: Optional[Dict[str, Any]] = Field(None, description="Database-specific data")
    
    def has_any_fix(self) -> bool:
        """Check if any range in this affected package has a fix"""
        if not self.ranges:
            return False
        return any(range_item.has_fixed_event() for range_item in self.ranges)
    
    def get_all_fixed_versions(self) -> List[str]:
        """Get all fixed versions across all ranges"""
        if not self.ranges:
            return []
        
        fixed_versions = []
        for range_item in self.ranges:
            fixed_versions.extend(range_item.get_fixed_versions())
        return fixed_versions

class Reference(BaseModel):
    type: str = Field(description="Reference type, e.g., 'ADVISORY'")
    url: str = Field(description="Reference URL")

    def __str__(self) -> str:
        return f"{self.type}: {self.url}"

class EPSS(BaseModel):
    score: float = Field(description="EPSS score (0.0-1.0)")
    percentile: float = Field(description="Percentile rank of this CVE")

class DebianCVE(BaseModel):
    id: str = Field(description="CVE identifier, e.g., 'CVE-2023-4863'")
    details: Optional[str] = Field(None, description="Detailed description of the CVE")
    modified: Optional[str] = Field(None, description="Modification timestamp")
    published: Optional[str] = Field(None, description="Publication timestamp")
    upstream: Optional[List[str]] = Field(None, description="Upstream references")
    references: Optional[List[Reference]] = Field(None, description="List of references")
    severity: Optional[List[Severity]] = Field(None, description="List of severity assessments")
    affected: List[Affected] = Field(description="List of affected packages and versions")
    epss: Optional[EPSS] = Field(None, description="EPSS score and percentile")

    def has_fixes_available(self) -> bool:
        """Check if any affected package has fixes available"""
        return any(affected_pkg.has_any_fix() for affected_pkg in self.affected)
    
    def get_packages_with_fixes(self) -> List[Affected]:
        """Get all affected packages that have fixes available"""
        return [affected_pkg for affected_pkg in self.affected if affected_pkg.has_any_fix()]
    
    def get_packages_without_fixes(self) -> List[Affected]:
        """Get all affected packages that don't have fixes available"""
        return [affected_pkg for affected_pkg in self.affected if not affected_pkg.has_any_fix()]
    
    def enrich_with_epss(self) -> None:
        """Enrich the CVE data with EPSS information if available"""
        cve_id = self.id.split('DEBIAN-')[1]
        if cve_id in epss_lookup:
            epss_score, percentile = epss_lookup.get(cve_id, (0.0, 0.0))
            self.epss = EPSS(score=epss_score, percentile=percentile)
    
    def filter_for_system_version(self, version: str) -> 'DebianCVE':
        """Filter for a specific Debian version (e.g., '12', '13')"""
        target_ecosystem = f"Debian:{version}"
        filtered_affected = [
            affected for affected in self.affected 
            if affected.package.ecosystem == target_ecosystem
        ]
        
        # Create a new instance with filtered data
        return DebianCVE(
            id=self.id,
            details=self.details,
            modified=self.modified,
            published=self.published,
            upstream=self.upstream,
            references=self.references,
            severity=self.severity,
            affected=filtered_affected,
            epss=self.epss
        )
    
    def filter_for_current_system(self) -> 'DebianCVE':
        """Return a new DebianCVE instance filtered for the current Debian version"""
        current_version = get_debian_version()
        if not current_version:
            # If we can't detect the version, return the original
            return self
        
        return self.filter_for_system_version(current_version)
    
    def filter_for_package_name(self, package_name: str) -> 'DebianCVE':
        """Filter affected packages by package name"""
        filtered_affected = [
            affected for affected in self.affected 
            if affected.package.name == package_name
        ]
        
        # Create a new instance with filtered data
        return DebianCVE(
            id=self.id,
            details=self.details,
            modified=self.modified,
            published=self.published,
            upstream=self.upstream,
            references=self.references,
            severity=self.severity,
            affected=filtered_affected,
            epss=self.epss
        )
    
    def get_available_ecosystems(self) -> List[str]:
        """Get list of all ecosystems in the affected packages"""
        return list(set(affected.package.ecosystem for affected in self.affected))
    
    def _format_references(self) -> str:
        """Format references section"""
        if not self.references:
            return ""
        
        lines = ["References:\n"]
        for ref in self.references:
            lines.append(f"  - {ref}\n")
        return "".join(lines)
    
    def _format_severity(self) -> str:
        """Format severity section"""
        if not self.severity:
            return ""
        
        lines = ["Severity:\n"]
        for sev in self.severity:
            lines.append(f"  - {sev}\n")
        return "".join(lines)
    
    def _format_versions(self, versions: List[str]) -> str:
        """Format versions section"""
        if not versions:
            return ""
        
        lines = ["  Versions:\n"]
        for version in versions:
            lines.append(f"    - {version}\n")
        return "".join(lines)
    
    def _format_fix_status(self, affected: Affected) -> str:
        """Format fix status for an affected package"""
        if not affected.ranges:
            return ""
        
        fixed_events = [
            event for range_item in affected.ranges 
            for event in range_item.events 
            if event.is_fixed_event()
        ]
        
        if fixed_events:
            lines = []
            for event in fixed_events:
                if not event.introduced:
                    lines.append(f"      * Status: {event}\n")
            return "".join(lines)
        else:
            return "  No fixes available.\n"
    
    def __str__(self) -> str:
        lines = [f"CVE: {self.id}\nDescription: {self.details}\n"]
        
        lines.append(self._format_references())
        lines.append(self._format_severity())
        
        for affected in self.affected:
            lines.append(f"Affected Package: {affected.package}\n")
            lines.append(self._format_versions(affected.versions))
            lines.append(self._format_fix_status(affected))
        
        return "".join(lines)

    def to_llm_summary(self) -> str:
        """Generates a concise summary optimized for LLM reasoning."""
        summary = [
            f"CVE_ID: {self.id}",
            f"CRITICALITY: {self.severity[0].score if self.severity else 'Unknown'}",
            f"DETAILS: {self.details[:300]}..." if self.details else "No details available." # Keep descriptions short
        ]
        
        if not self.affected:
            summary.append("SYSTEM_IMPACT: No packages affected on this Debian version.")
        else:
            for affected_pkg in self.affected:
                pkg_name = affected_pkg.package.name
                fixed_versions = affected_pkg.get_all_fixed_versions()
                
                status = "FIX_AVAILABLE" if fixed_versions else "AWAITING_FIX"
                fix_detail = f"Fixed in: {', '.join(fixed_versions)}" if fixed_versions else "No fix version assigned yet."
                installed = f"Installed: {', '.join(cve_bin_map.get(self.id.split('DEBIAN-')[1], ['Unknown']))}"
                
                summary.append(f"PACKAGE: {pkg_name} | STATUS: {status} | {fix_detail} | {installed}")

        summary.append(f"EPSS_SCORE: {self.epss.score if self.epss else 'Unknown'} | PERCENTILE: {self.epss.percentile if self.epss else 'Unknown'}")
        
        return "\n".join(summary)

Function to prefetch the epss CSV data so we need not hit the website to get EPSS score

In [None]:
import requests
import gzip
import csv

def download_epss_csv(dest_path="epss_scores-current.csv.gz") -> bool:
    """Downloads the latest EPSS CSV file.

    Args:
        dest_path (str): Path to save the downloaded file.
    Returns:
        bool: True if download succeeded, False otherwise.
    """
    url = "https://epss.empiricalsecurity.com/epss_scores-current.csv.gz"
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(dest_path, 'wb') as f:
            f.write(response.content)
        return True
    except Exception as e:
        print(f"Update failed: {e}. Falling back to existing file.")
        return False
    
def load_epss_map(file_path="epss_scores-current.csv.gz"):
    """Loads EPSS data into a dictionary indexed by CVE ID."""
    epss_map = {}
    with gzip.open(file_path, mode='rt') as f:
        # Skip FIRST.org header metadata lines
        reader = csv.DictReader(filter(lambda row: not row.startswith('#'), f))
        for row in reader:
            # Map CVE ID to its (epss, percentile) tuple
            epss_map[row['cve']] = (row['epss'], row['percentile'])
    return epss_map

download_epss_csv()

In [None]:
epss_lookup = load_epss_map()

A function to return OS version in format used by osv.dev data. Sid is always marked as forky/sid (or Debian next version/sid) so for now its 14. Function needs update when Debian 14 releases for unstable.

In [None]:
import re
def get_debian_version() -> Optional[str]:
    """Detect the current Debian version from the system"""
    try:
        # Try to read from /etc/debian_version
        with open('/etc/debian_version', 'r') as f:
            version = f.read().strip()
            # Extract major version number (e.g., "12.7" -> "12")
            match = re.match(r'(\d+)', version)
            if match:
                return match.group(1)
            match = re.match(r'.*\/sid', version)
            if match:
                return "14"
    except (FileNotFoundError, IOError):
        pass
    
    return None

In [None]:
import subprocess
from collections import defaultdict
from functools import lru_cache
from typing import Dict, Set

@lru_cache(maxsize=1024)
def get_source_packages_batch(packages: tuple[str, ...]) -> Dict[str, str]:
    """Get source package names for multiple binary packages in a single call."""
    if not packages:
        return {}
    
    # Build the dpkg-query command for all packages at once
    package_list = list(packages)
    try:
        # Use a single dpkg-query call with multiple packages
        cmd = ['dpkg-query', '-W', '-f=${Package}\t${Source}\n'] + package_list
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        
        source_map = {}
        for line in result.stdout.strip().split('\n'):
            if line:
                parts = line.split('\t')
                if len(parts) >= 2:
                    pkg_name, source_pkg = parts[0], parts[1]
                    # If source is empty, use package name
                    source_pkg = source_pkg.strip() or pkg_name
                    # Remove version suffix if present
                    source_pkg = source_pkg.split(' ')[0]
                    source_map[pkg_name] = source_pkg
                elif len(parts) == 1:
                    # No source package, use the package name itself
                    pkg_name = parts[0]
                    source_map[pkg_name] = pkg_name
        
        # Fill in any missing packages (fallback to package name)
        for pkg in packages:
            if pkg not in source_map:
                source_map[pkg] = pkg
                
        return source_map
    except subprocess.CalledProcessError:
        # Fallback: return package names as source names
        return {pkg: pkg for pkg in packages}

def cve_package_map():
    """Build a mapping from CVEs to their affected source packages."""
    cve_package_map = defaultdict(list)
    cve_binary_packages = defaultdict(set)

    try:
        result = subprocess.run(['debsecan'], 
                                capture_output=True,
                                text=True,
                                check=True)
        lines = result.stdout.strip().split('\n')
        
        # First pass: collect all unique package names
        packages: Set[str] = set()
        
        for line in lines:
            parts = line.split()
            if len(parts) >= 2:
                cve, package = parts[0], parts[1]
                packages.add(package)
                cve_binary_packages[cve].add(package)
        
        # Batch lookup of source packages
        if packages:
            source_map = get_source_packages_batch(tuple(packages))
            
            # Second pass: map CVEs to source packages
            for cve, binary_packages in cve_binary_packages.items():
                for pkg in binary_packages:
                    source_pkg = source_map.get(pkg, pkg)
                    cve_package_map[cve].append(source_pkg)
    
    except subprocess.CalledProcessError as e:
        print(f"Error during debsecan execution: {e}")
    
    return cve_package_map, cve_binary_packages

In [None]:
cve_src_map, cve_bin_map = cve_package_map()

In [None]:
import subprocess
from typing import List
from langchain.tools import tool


def scan_system_for_vulnerabilities() -> List[str]:
    """Scan system using debsecan and return list all CVE IDs."""
    try:
        result = subprocess.run(['debsecan',], 
                                capture_output=True, text=True, check=True)
        cve_list = {cve.split(' ')[0] for cve in result.stdout.strip().split('\n')}
        # Convert EPSS score to float for proper sorting
        sorted_vulns = sorted(cve_list, 
                            key=lambda x: float(epss_lookup.get(x, (0.0, 0.0))[0]),
                            reverse=True)
        return list(sorted_vulns)
    except subprocess.CalledProcessError as e:
        print(f"Error during debsecan execution: {e}")
        return [""]

In [None]:
import requests
from concurrent.futures import ThreadPoolExecutor
from langchain.tools import tool
from pydantic import ValidationError

def research_cve(cve_id: str) -> DebianCVE|str:
    """Research a single CVE ID using an external tool or API."""
    # Placeholder for actual research logic
    try:
        response = requests.get(f"https://api.osv.dev/v1/vulns/DEBIAN-{cve_id}")
        response.raise_for_status()
        
        debian_cve = DebianCVE(**response.json())
        debian_cve.enrich_with_epss()
        filter_debian_cve = debian_cve.filter_for_current_system()
        srcpkg = cve_src_map.get(cve_id)
        if srcpkg:
            filter_debian_cve = filter_debian_cve.filter_for_package_name(srcpkg[0])
        return filter_debian_cve
    except requests.RequestException as e:
        return f"Error researching {cve_id}: {e}"
    except ValidationError as e:
        return f"Error parsing CVE data for {cve_id}: {e}"

def research_vulnerabilities(cve_ids: List[str]) -> List[DebianCVE]|str:
    """Provides more details about the CVE IDs passed"""
    if not cve_ids:
        return []
    
    with ThreadPoolExecutor(max_workers=len(cve_ids)) as executor:
        futures = executor.map(research_cve, cve_ids)
        results = list(futures)
    
    return results

In [None]:
import json
def scan_system_status(top: int = 5):
    """ scans the system for `top` vulnerabilities and returns their summary
    """
    try:
        vulnerabilies = scan_system_for_vulnerabilities()
        research_summary = research_vulnerabilities(vulnerabilies)
        errors = [item for item in research_summary if isinstance(item, str)]
        only_debian_cves = [item for item in research_summary if isinstance(item, DebianCVE)]
        print(only_debian_cves)
        print("\n\n------\n\n".join([item.to_llm_summary() for item in only_debian_cves]))
        print("\n\n\n\n")
        print("Error fetching these CVEs:")
        for error in errors:
            print(f"  - {error}")
    except Exception as e:
        import traceback
        traceback.print_exc()
        return f"TOOL ERROR: Getting system vulnerability status failed: {e}"
    return research_summary


In [None]:
print(scan_system_status(top=5))

In [None]:
print(cve_bin_map)