In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from dotenv import load_dotenv

load_dotenv()

True

In [20]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [21]:
from typing import List
from exploitation_evaluator import ExploitationEvaluator


def evaluate(evaluator: ExploitationEvaluator) -> List[dict]:
    data = {
        'CVE-2024-21762': 'active',
        'CVE-2024-8142': 'poc',
        'CVE-2024-45244': 'none',
        'CVE-2024-8140': 'poc',
        'CVE-2024-8133': 'poc',
        'CVE-2024-8131': 'poc',
        'CVE-2024-45187': 'none',
        'CVE-2024-8112': 'none',
    }

    hit = 0
    total = 0
    results = []

    for (cve_id, gt) in data.items():
        result = evaluator.evaluate(cve_id)
        results.append(result)

        print(f"Predicted as {result['assessment']}. {('' if gt == result['assessment'] else f' (Actual is {gt}).')}")

        hit += (1 if gt == result['assessment'] else 0)
        total += 1

    print(f'Accuracy is {hit / float(total) * 100}%')

    return results

In [26]:
gemini_results = evaluate(ExploitationEvaluator('gemini'))

I0000 00:00:1724580394.543449 47534615 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Predicted as active. 
Predicted as poc. 
Predicted as poc.  (Actual is none).
Predicted as poc. 
Predicted as poc. 
Predicted as poc. 
Predicted as poc.  (Actual is none).
Predicted as poc.  (Actual is none).
Accuracy is 62.5%


In [27]:
gemini_results

[{'cve_id': 'CVE-2024-21762',
  'assessment': 'active',
  'description': "This vulnerability has been added to CISA's Known Exploited Vulnerabilities Catalog (KEV) based on observed exploitation. A proof of concept is available on GitHub and was published by security researchers at Asset Note.",
  'confidence': 1.0},
 {'cve_id': 'CVE-2024-8142',
  'assessment': 'poc',
  'description': 'A proof-of-concept exploit code is available on GitHub at https://github.com/jadu101/CVE/blob/main/SourceCodester_Daily_Calories_Monitoring_Tool_delete_calorie_XSS.md. This code demonstrates how the vulnerability can be exploited to achieve cross-site scripting in the SourceCodester Daily Calories Monitoring Tool version 1.0.',
  'confidence': 1.0},
 {'cve_id': 'CVE-2024-45244',
  'assessment': 'poc',
  'description': "The Github commit referenced in the National Vulnerability Database entry for this CVE provides a fix for the issue. While this fix doesn't represent a working exploit, it does indicate th

In [28]:
openai_results = evaluate(ExploitationEvaluator('openai'))

Predicted as active. 
Predicted as poc. 
Predicted as none. 
Predicted as poc. 
Predicted as poc. 
Predicted as poc. 
Predicted as none. 
Predicted as poc.  (Actual is none).
Accuracy is 87.5%


In [29]:
openai_results

[{'cve_id': 'CVE-2024-21762',
  'assessment': 'active',
  'description': "CVE-2024-21762 is a critical vulnerability in Fortinet FortiOS and FortiProxy that allows unauthorized code execution via out-of-bounds write. Sources such as Attackerkb and CISA have reported active exploitation in the wild. Additionally, there are multiple GitHub repositories providing proof of concept for this CVE, suggesting it's being targeted by attackers. References confirm extensive exploitation in real-world scenarios.",
  'confidence': 1},
 {'cve_id': 'CVE-2024-8142',
  'assessment': 'poc',
  'description': 'The vulnerability in SourceCodester Daily Calories Monitoring Tool 1.0, which affects the file /endpoint/delete-calorie.php, has a publicly disclosed exploit indicated as a proof of concept. This is supported by a GitHub repository (https://github.com/jadu101/CVE/blob/main/SourceCodester_Daily_Calories_Monitoring_Tool_delete_calorie_XSS.md) detailing the exploit.',
  'confidence': 0.9},
 {'cve_id': 