In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [10]:
from typing import List
from exploitation_evaluator import ExploitationEvaluator


def evaluate(evaluator: ExploitationEvaluator) -> List[dict]:
    data = {
        'CVE-2024-21762': 'active',
        'CVE-2024-8142': 'poc',
        'CVE-2024-45244': 'none',
        'CVE-2024-8140': 'poc',
        'CVE-2024-8133': 'poc',
        'CVE-2024-8131': 'poc',
        'CVE-2024-45187': 'none',
        'CVE-2024-8112': 'poc',
    }

    hit = 0
    total = 0
    results = []

    for (cve_id, gt) in data.items():
        result = evaluator.evaluate(cve_id)
        results.append(result)

        print(f"{cve_id} has been predicted as {result['assessment']}. {('' if gt == result['assessment'] else f' (Actual is {gt}).')}")

        hit += (1 if gt == result['assessment'] else 0)
        total += 1

    print(f'Accuracy is {hit / float(total) * 100}%')

    return results

In [11]:
gemini_results = evaluate(ExploitationEvaluator('gemini'))

I0000 00:00:1725165632.999448 71023544 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


CVE-2024-21762 has been predicted as active. 
CVE-2024-8142 has been predicted as poc. 
CVE-2024-45244 has been predicted as none. 
CVE-2024-8140 has been predicted as poc. 
CVE-2024-8133 has been predicted as poc. 
CVE-2024-8131 has been predicted as poc. 
CVE-2024-45187 has been predicted as poc.  (Actual is none).
CVE-2024-8112 has been predicted as poc. 
Accuracy is 87.5%


In [15]:
gemini_results

[{'cve_id': 'CVE-2024-21762',
  'assessment': 'active',
  'justification': 'The Cybersecurity and Infrastructure Security Agency (CISA) added CVE-2024-21762 to its Known Exploited Vulnerabilities (KEV) Catalog on February 9, 2024, based on observed exploitation. AttackerKB has identified this vulnerability as being used by attackers in the wild and provides details of real-world attacks that have exploited this vulnerability. ',
  'confidence': 1.0},
 {'cve_id': 'CVE-2024-8142',
  'assessment': 'poc',
  'justification': 'A proof of concept exploit for CVE-2024-8142 is publicly available on GitHub, demonstrating how the vulnerability can be used to achieve Cross-site Scripting (XSS). The repository is maintained by a security researcher, and while there is no indication of active exploitation in the wild, the availability of this exploit code significantly increases the risk of potential attacks.',
  'confidence': 0.7},
 {'cve_id': 'CVE-2024-45244',
  'assessment': 'none',
  'justificat

In [13]:
openai_results = evaluate(ExploitationEvaluator('openai'))

CVE-2024-21762 has been predicted as active. 
CVE-2024-8142 has been predicted as poc. 
CVE-2024-45244 has been predicted as none. 
CVE-2024-8140 has been predicted as poc. 
CVE-2024-8133 has been predicted as active.  (Actual is poc).
CVE-2024-8131 has been predicted as active.  (Actual is poc).
CVE-2024-45187 has been predicted as none. 
CVE-2024-8112 has been predicted as poc. 
Accuracy is 75.0%


In [14]:
openai_results

[{'cve_id': 'CVE-2024-21762',
  'assessment': 'active',
  'justification': "Reliable and confirmed evidence of real exploitation has been collected and publicly reported by multiple sources. Notably, the vulnerability has been added to CISA's Known Exploited Vulnerabilities (KEV) catalog with concrete examples of exploitation, and there is a public post discussing a proof of concept for remote code execution in FortiGate devices. This demonstrates that the vulnerability is being actively exploited in the wild.",
  'confidence': 1},
 {'cve_id': 'CVE-2024-8142',
  'assessment': 'poc',
  'justification': "An exploit for this vulnerability has been publicly disclosed. Specifically, a proof of concept has been shared on a GitHub repository that demonstrates how an attacker can manipulate the 'calorie' argument in the Daily Calories Monitoring Tool to achieve cross-site scripting (XSS). References to this exploit can be found on VulDB and other related sources.",
  'confidence': 0.9},
 {'cve