In [None]:
import requests
from bs4 import BeautifulSoup
import time

def extract_description_and_cwe(url):
    try:
        response = requests.get(url, timeout=10) 
        response.raise_for_status()  # Raise an exception for HTTP errors
        soup = BeautifulSoup(response.content, "html.parser")

        # Extract description
        description_tag = soup.find("p", {"data-testid": "vuln-description"})
        description = description_tag.text.strip() if description_tag else "No description found"

        # Extract CWE ID
        cwe_cvss = soup.find(class_="tooltipCvss3NistMetrics")
        print(cwe_cvss)
        cvss = cwe_cvss.text.strip() if cwe_cvss else "Not found"
        print(cvss)

        return description, cvss
    except Exception as e:
        print(f"Error fetching data from {url}: {e}")
        return "No description found", "No CWE ID found"

def clean_text(text):
    """Helper function to clean text by removing newlines, tabs, and extra spaces."""
    text = text.replace('\n', ' ').replace('\t', ' ')
    text = ' '.join(text.split())
    return text.strip()

def save_descriptions_and_cwe_to_tsv(start, end):
    with open('cti_vsp.tsv', 'a', encoding='utf-8') as file:
        file.write('URL\tDescription\tCVSS\tPrompt\n')  # Write the header

        for i in range(start, end + 1):
            cve_id = f"{i:04d}"  # Format CVE ID with leading zeros
            url = f"https://nvd.nist.gov/vuln/detail/CVE-2024-{cve_id}"
            print(f"Processing {url}...")

            description, cvss = extract_description_and_cwe(url)

            # Clean the description
            cleaned_description = clean_text(description)

            prompt = f"""
              Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the
              values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric's value and
              provide the fnal CVSS v3.1 vector string.
              Valid options for each metric are as follows:
              - Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)
              - Attack Complexity (AC): Low (L), High (H)
              - Privileges Required (PR): None (N), Low (L), High (H)
              - User Interaction (UI): None (N), Required (R)
              - Scope (S): Unchanged (U), Changed (C)
              - Confdentiality (C): None (N), Low (L), High (H)
              - Integrity (I): None (N), Low (L), High (H)
              - Availability (A): None (N), Low (L), High (H)
              Summarize each metric’s value and provide the fnal CVSS v3.1 vector string. Ensure the fnal line
              of your response contains only the CVSS v3 Vector String in the following format:
              Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
              CVE Description: {cleaned_description}

                """

            # Clean the prompt
            cleaned_prompt = clean_text(prompt)

            print(f"Description found: {cleaned_description[:100]}...") 

            file.write(f"{url}\t{cleaned_description}\t{cvss}\t{cleaned_prompt}\n")

            time.sleep(2)

# Example usage
save_descriptions_and_cwe_to_tsv(20000, 23000)


In [None]:
save_descriptions_and_cwe_to_tsv(24000, 25000)
save_descriptions_and_cwe_to_tsv(23000, 23500)
save_descriptions_and_cwe_to_tsv(4000, 4900)


In [None]:
import csv

def combine_tsv_files(file1, file2, output_file):
    with open(output_file, 'w', encoding='utf-8', newline='') as outfile:
        writer = csv.writer(outfile, delimiter='\t')

        # Combine file1
        with open(file1, 'r', encoding='utf-8') as infile1:
            reader1 = csv.reader(infile1, delimiter='\t')
            for row in reader1:
                writer.writerow(row)

        # Combine file2
        with open(file2, 'r', encoding='utf-8') as infile2:
            reader2 = csv.reader(infile2, delimiter='\t')
            for row in reader2:
                writer.writerow(row)

combine_tsv_files('cti_vsp_F.tsv', 'cti_vsp2.tsv', 'cti_VSP.tsv')
