In [1]:
%run variables-ds-pydantic.ipynb


In [2]:
import json
epss_lookup = load_epss_map()
cve_pkg_map = cve_package_map()

In [36]:
import subprocess
from typing import List
from langchain.tools import tool


def scan_system_for_vulnerabilities(top: int = 5) -> List[str]:
    """Scan system using debsecan and return list of `top` CVE IDs."""
    try:
        result = subprocess.run(['debsecan',], 
                                capture_output=True, text=True, check=True)
        cve_list = {cve.split(' ')[0] for cve in result.stdout.strip().split('\n')}
        # Convert EPSS score to float for proper sorting
        sorted_vulns = sorted(cve_list, 
                            key=lambda x: float(epss_lookup.get(x, (0.0, 0.0))[0]),
                            reverse=True)
        return list(sorted_vulns)[:top]
    except subprocess.CalledProcessError as e:
        print(f"Error during debsecan execution: {e}")
        return [""]

In [37]:
import requests
from concurrent.futures import ThreadPoolExecutor
from langchain.tools import tool

def research_cve(cve_id: str) -> str:
    """Research a single CVE ID using an external tool or API."""
    # Placeholder for actual research logic
    try:
        response = requests.get(f"https://api.osv.dev/v1/vulns/DEBIAN-{cve_id}")
        response.raise_for_status()
        
        debian_cve = DebianCVE(**response.json())
        filter_debian_cve = debian_cve.filter_for_current_system()
        package = cve_pkg_map.get(cve_id)
        filter_debian_cve = filter_debian_cve.filter_for_package_name(package[0])
        return filter_debian_cve.to_llm_summary(*epss_lookup.get(cve_id, (0,0)))
    except requests.RequestException as e:
        return f"Error researching {cve_id}: {e}"


def research_vulnerabilities(cve_ids: List[str]) -> str:
    """Provides more details about the CVE IDs passed"""
    if not cve_ids:
        return "No CVE IDs provided for research."
    
    with ThreadPoolExecutor(max_workers=len(cve_ids)) as executor:
        futures = executor.map(research_cve, cve_ids)
        results = list(futures)
    
    # Combine all results into a single string
    combined_results = "\n\n---\n\n".join(results)
    return combined_results

In [None]:
import os
from langchain.agents import create_agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool

# Initialize the LLM - using the correct model name from documentation
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
llm = ChatGoogleGenerativeAI(
    model="gemini-3-flash",  # Latest model from the docs
    api_key=GOOGLE_API_KEY,
    temperature=0.1
)

# Define system prompt for the agent
system_prompt = """You are a senior Debian Security Analyst. Your task is as follows:
1. Scan the system for vulnerabilities using the `scan_system_for_vulnerabilities` tool based on number of vulnerabilities input by user.
2. Pass the CVE List from the scan to the `research_vulnerabilities` tool to get detailed information.
3. Analyse the vulnerabilities and prepare a report summarising the top vulnerabilities and system status and remediation.
4. Your mission fails if you fail to execute both tools correctly

Always use both tools in sequence - first scan for vulnerabilities, then research the found CVEs."""

def extract_content(result):
    """Enhanced content extraction that works with different model response formats"""
    try:
        # Method 1: Direct content attribute (most common)
        if hasattr(result, 'content'):
            content = result.content
            # Handle list of content blocks (Gemini 3.x format)
            if isinstance(content, list):
                text_parts = []
                for block in content:
                    if isinstance(block, dict):
                        if block.get('type') == 'text':
                            text_parts.append(block.get('text', ''))
                        elif 'text' in block:
                            text_parts.append(block['text'])
                    else:
                        text_parts.append(str(block))
                return '\n'.join(text_parts) if text_parts else str(content)
            # Handle string content (Gemini 2.5 format)
            elif isinstance(content, str):
                return content
            else:
                return str(content)
        
        # Method 2: Dict with messages array (agent response format)
        elif isinstance(result, dict):
            if 'messages' in result and result['messages']:
                last_message = result['messages'][-1]
                return extract_content(last_message)
            elif 'content' in result:
                return extract_content(type('obj', (object,), {'content': result['content']})())
            elif 'output' in result:
                return str(result['output'])
            elif 'text' in result:
                return str(result['text'])
        
        # Method 3: AIMessage object with different attributes
        elif hasattr(result, 'text'):
            return result.text
        elif hasattr(result, 'message'):
            return extract_content(result.message)
        
        # Method 4: Fallback to string representation
        return str(result)
    
    except Exception as e:
        return f"Error extracting content: {e}\nRaw result: {str(result)}"

# Create the agent using the modern LangChain 1.x approach
agent = create_agent(
    model=llm,
    tools=[tool(scan_system_for_vulnerabilities), tool(research_vulnerabilities)],
    system_prompt=system_prompt
)

# Execute the security scan
try:
    result = agent.invoke({
        "messages": [{"role": "user", "content": "Scan my system for top 5 vulnerabilities and provide a comprehensive security analysis report"}]
    })
    
    print("\n" + "="*50)
    print("SECURITY ANALYSIS REPORT")
    print("="*50)
    
    # Extract content using enhanced method
    content = extract_content(result)
    print(content)
    
    # Debug info (commented out - uncomment if needed for troubleshooting)
    # print(f"\n[DEBUG] Result type: {type(result)}")
    # print(f"[DEBUG] Result attributes: {dir(result)}")
    # print(f"[DEBUG] Raw result: {result}")
        
except Exception as e:
    print(f"Error executing agent: {e}")
    print(f"Error type: {type(e)}")
    print("\nPlease check:")
    print("- GOOGLE_API_KEY environment variable is set")
    print("- debsecan is installed and accessible")
    print("- Network connectivity for CVE research")
    print("- LangChain dependencies are properly installed")

## Gemini 2.5 Flash output

### Debian Security Analysis Report

**Date:** October 26, 2024

**1. Introduction**
This report summarizes the findings of a security scan conducted on your system, identifying the top 5 vulnerabilities. The analysis includes detailed information on each vulnerability, its potential impact, and recommended remediation strategies.

**2. System Status**
The system currently has 5 identified vulnerabilities across `apt-cacher-ng`, `augeas`, and `avahi` packages. All identified vulnerabilities are currently awaiting a fix from the Debian security team, indicating that immediate patches are not yet available. While the Exploit Prediction Scoring System (EPSS) scores for these vulnerabilities are relatively low, indicating a lower likelihood of active exploitation, their presence still poses a risk to the system's integrity and availability.

**3. Top Vulnerabilities**

Here are the top 5 vulnerabilities identified on your system:

*   **CVE-2025-11147: Reflected Cross-Site Scripting (XSS) in Apt-Cacher-NG**
    *   **Affected Package:** `apt-cacher-ng`
    *   **Criticality:** CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N (Medium)
    *   **Details:** This vulnerability in Apt-Cacher-NG v3.2.1 allows for reflected cross-site scripting (XSS) attacks. Malicious scripts can be executed in `/html/<filename>.html`, potentially leading to session hijacking, defacement, or other client-side attacks if a user is tricked into clicking a malicious link.
    *   **Current Status:** AWAITING_FIX (No fix version assigned yet)
    *   **EPSS Score:** 0.00039 (Percentile: 0.11678) - Indicates a very low probability of exploitation in the wild.

*   **CVE-2025-2588: Null Pointer Dereference in Hercules Augeas**
    *   **Affected Package:** `augeas` (`augeas-lenses`, `augeas-tools`)
    *   **Criticality:** CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:L (Low)
    *   **Details:** A problematic vulnerability in Hercules Augeas 1.14.1 affects the `re_case_expand` function in `src/fa.c`. Manipulation of the `re` argument can lead to a null pointer dereference, potentially causing a denial of service. This vulnerability requires local access to exploit.
    *   **Current Status:** AWAITING_FIX (No fix version assigned yet)
    *   **EPSS Score:** 0.00177 (Percentile: 0.39384) - Indicates a low probability of exploitation in the wild.

*   **CVE-2024-52615: Fixed Source Ports in Avahi-daemon DNS Queries**
    *   **Affected Package:** `avahi` (`avahi-daemon`)
    *   **Criticality:** CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:N (Medium)
    *   **Details:** Avahi-daemon relies on fixed source ports for wide-area DNS queries. This design flaw simplifies the injection of malicious DNS responses by attackers, potentially leading to DNS spoofing and redirection to malicious sites.
    *   **Current Status:** AWAITING_FIX (No fix version assigned yet)
    *   **EPSS Score:** 0.00068 (Percentile: 0.21253) - Indicates a very low probability of exploitation in the wild.

*   **CVE-2024-52616: Predictable DNS Transaction IDs in Avahi-daemon**
    *   **Affected Package:** `avahi` (`avahi-daemon`)
    *   **Criticality:** CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:N (Medium)
    *   **Details:** Avahi-daemon initializes DNS transaction IDs randomly only once at startup and then increments them sequentially. This predictable behavior makes it easier for attackers to guess transaction IDs, facilitating DNS spoofing attacks.
    *   **Current Status:** AWAITING_FIX (No fix version assigned yet)
    *   **EPSS Score:** 0.00086 (Percentile: 0.25054) - Indicates a very low probability of exploitation in the wild.

**4. Remediation Recommendations**

Given that all identified vulnerabilities are currently awaiting a fix, the primary remediation strategy involves monitoring and timely application of updates.

*   **Monitor Debian Security Advisories:** Regularly check the Debian Security Advisories (DSAs) for updates related to `apt-cacher-ng`, `augeas`, and `avahi`. Subscribe to the debian-security-announce mailing list to receive immediate notifications when fixes are released.
*   **Apply Updates Promptly:** As soon as security updates are available for the affected packages, apply them to your system without delay. Use `sudo apt update && sudo apt upgrade` or `sudo apt-get dist-upgrade` to ensure all security patches are installed.
*   **Network Segmentation (for Avahi-daemon):** For CVE-2024-52615 and CVE-2024-52616, consider isolating systems running Avahi-daemon on a separate network segment or restricting its network access to only trusted hosts if its functionality is not required for wide-area DNS queries. This can limit the attack surface for DNS spoofing.
*   **User Awareness and Input Validation (for Apt-Cacher-NG):** For CVE-2025-11147, educate users about the risks of clicking suspicious links. While awaiting a fix, ensure that any user-supplied input to Apt-Cacher-NG is properly validated and sanitized to mitigate XSS risks.
*   **Least Privilege Principle:** Ensure that services and applications run with the minimum necessary privileges to reduce the impact of a successful exploit.

**5. Conclusion**

Your system has several vulnerabilities that require attention. While the EPSS scores suggest a lower immediate threat of exploitation, the presence of these flaws, especially those allowing network-based attacks (Avahi-daemon), necessitates proactive monitoring and remediation. It is crucial to stay informed about Debian security updates and apply them as soon as they become available to maintain a robust security posture. Continuous vigilance and adherence to security best practices are essential for protecting your system against evolving threats.

In [38]:

def scan_system_status(top: int = 5):
    """ scans the system for `top` vulnerabilities and returns their summary
    """
    try:
        vulnerabilies = scan_system_for_vulnerabilities(top)
        research_summary = research_vulnerabilities(vulnerabilies)
    except Exception as e:
        return f"TOOL ERROR: Getting system vulnerability status failed: {e}"
    return research_summary


In [34]:
#print(scan_system_status())
cve_pkg_map.get('CVE-2024-27280')

['ruby3.1']

In [39]:
from langchain.tools import tool
from langchain_ollama import ChatOllama
from langchain.agents import create_agent

llm = ChatOllama(
    model="llama3.1",
    base_url="http://localhost:11434",
    temperature=0,
)

tools = [tool(scan_system_status)]
prompt = """You are a Debian Security Analyst. Analyze ONLY the tool output data provided.

STRICT RULES:
- Use ONLY data from the tool output
- DO NOT add external knowledge or assumptions
- Extract CVE ID, package name, CVSS score, EPSS score, and status from each vulnerability
- Explain real-world impact based on the vulnerability description provided
- For CVSS scores, use these labels: 0.0-3.9 (Low), 4.0-6.9 (Medium), 7.0-8.9 (High), 9.0-10.0 (Critical)

FORMAT:
# Security Analysis Report

## System Status
[Brief count of vulnerabilities found]

## Critical Findings
For each vulnerability from tool data:
- **CVE-ID**: [from data]
- **Package**: [from data]  
- **Severity**: [CVSS score from data] ([Low/Medium/High/Critical label])
- **Impact**: [based on description in tool data]
- **Exploit Probability**: [EPSS score from data]
- **Status**: [fix status from data]

## Recommendations
- Update packages when fixes are available
- Monitor for security advisories
"""

agent = create_agent(
    model=llm,
    tools=tools,
    system_prompt=prompt,
)

inputs = {
    "messages": [
        {
            "role": "user",
            "content": "Provide a comprehensive security analysis report for top 5 vulnerabilities on my system."
        }
    ]
}

# Iterate through the stream
for chunk in agent.stream(inputs, stream_mode="updates"):
    for step_name, data in chunk.items():
        print(f"\n[Step: {step_name}]")
        
        # Pull the last message from this step's update
        last_msg = data["messages"][-1]
        
        # Use pretty_print() for a clean display of each step
        last_msg.pretty_print()

        # Access token usage metadata if it exists
        if hasattr(last_msg, 'usage_metadata') and last_msg.usage_metadata:
            usage = last_msg.usage_metadata
            print(f"\n--- Token Stats ---")
            print(f"Input Tokens: {usage.get('input_tokens')}")
            print(f"Output Tokens: {usage.get('output_tokens')}")
            print(f"Total Tokens: {usage.get('total_tokens')}")    


[Step: model]
Tool Calls:
  scan_system_status (88f02042-61bb-44be-8831-043f14e47190)
 Call ID: 88f02042-61bb-44be-8831-043f14e47190
  Args:
    n: 5
    vulnerability_type: top

--- Token Stats ---
Input Tokens: 413
Output Tokens: 27
Total Tokens: 440

[Step: tools]
Name: scan_system_status

CVE_ID: DEBIAN-CVE-2023-44487
CRITICALITY: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
DETAILS: The HTTP/2 protocol allows a denial of service (server resource consumption) because request cancellation can reset many streams quickly, as exploited in the wild in August through October 2023....
PACKAGE: grpc | STATUS: AWAITING_FIX | No fix version assigned yet.
EPSS_SCORE: 0.94427 | PERCENTILE: 0.99981

---

CVE_ID: DEBIAN-CVE-2024-56433
CRITICALITY: CVSS:3.1/AV:L/AC:H/PR:L/UI:N/S:U/C:L/I:L/A:N
DETAILS: shadow-utils (aka shadow) 4.4 through 4.17.0 establishes a default /etc/subuid behavior (e.g., uid 100000 through 165535 for the first user account) that can realistically conflict with the uids of