In [3]:
import uuid
import re
import json
import requests
from bs4 import BeautifulSoup

In [13]:
# получаем html код страницы, если она доступна, иначе Error
def get_html_code(url: str) -> str:
    try:
        req = requests.get(url, timeout=10)
        # The HTTP 200 OK success status response code indicates that the request has succeeded.
        if req.status_code == 200:
            return req.text
        else:
            return "Error"
        
    except Exception as e:
        print(e)
        return "Error"

# переводим html код в объект soup класса BeautifulSoup
def html_to_soup(html: str) -> BeautifulSoup:
    soup = BeautifulSoup(html, "html.parser")
    return soup

# вытаскивает из текста совпадения, используя регулярные выражения
def find_by_regex(text: str, regular_expression: re.Pattern) -> list[str]:
    matches = re.findall(regular_expression, text)
    matches = list(set(matches))
    return matches

# обогащает CVE описанием с mitre.org
def enrich_cve(cve: str) -> str:
    try:
        mitre_url = "https://cve.mitre.org/cgi-bin/cvename.cgi?name="
        cve_url = mitre_url + cve

        html_code = get_html_code(cve_url)
        soup = html_to_soup(html_code)

        description = soup.find("th", string="Description")
        description = description.find_next("td", colspan="2").text
    except Exception as e:
        print(e)
        return "Error"

    return description

# записывает данные в json файл
def write_to_json(filename: str, data: list[dict]):
    with open(filename, 'w') as f:
        json.dump(data,
                  f,
                  indent=4,
                  ensure_ascii=True)

# читает данные из json файла
def read_from_json(filename: str):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

In [15]:
adobe_url = "https://helpx.adobe.com/security/Home.html"
html_code = get_html_code(adobe_url)
soup = html_to_soup(html_code)

table = soup.find_all("table")[0]

final_data = []

for link in table.find_all("a"):
  information_link = "https://helpx.adobe.com" + link['href']
  product = link.text

  html_code = get_html_code(information_link)
  soup = html_to_soup(html_code)
  clean_text = soup.text

  CVEs = find_by_regex(clean_text, r"CVE-\d{4}-\d{2,7}")

  

  dictionary = {
    "id": str(uuid.uuid4()),
    "href": information_link,
    "product": product,
    "cve": {

    }
  }

  for cve in CVEs:
    description = enrich_cve(cve)
    dictionary["cve"][cve] = description

  final_data.append(dictionary)

final_data

[{'id': '215b5cb9-01de-4ba2-9176-c950d668ac21',
  'href': 'https://helpx.adobe.com/security/products/media-encoder/apsb24-53.html',
  'product': 'APSB24-53 :\xa0Security update available for Adobe Media Encoder',
  'cve': {'CVE-2024-41870': 'Media Encoder versions 24.5, 23.6.8 and earlier are affected by an out-of-bounds read vulnerability that could lead to disclosure of sensitive memory. An attacker could leverage this vulnerability to bypass mitigations such as ASLR. Exploitation of this issue requires user interaction in that a victim must open a malicious file.\n\n',
   'CVE-2024-41873': 'Media Encoder versions 24.5, 23.6.8 and earlier are affected by an out-of-bounds read vulnerability that could lead to disclosure of sensitive memory. An attacker could leverage this vulnerability to bypass mitigations such as ASLR. Exploitation of this issue requires user interaction in that a victim must open a malicious file.\n\n',
   'CVE-2024-39377': 'Media Encoder versions 24.5, 23.6.8 and 

In [16]:
write_to_json("cve.json", final_data)

In [17]:
data = read_from_json("cve.json")
data

[{'id': '215b5cb9-01de-4ba2-9176-c950d668ac21',
  'href': 'https://helpx.adobe.com/security/products/media-encoder/apsb24-53.html',
  'product': 'APSB24-53 :\xa0Security update available for Adobe Media Encoder',
  'cve': {'CVE-2024-41870': 'Media Encoder versions 24.5, 23.6.8 and earlier are affected by an out-of-bounds read vulnerability that could lead to disclosure of sensitive memory. An attacker could leverage this vulnerability to bypass mitigations such as ASLR. Exploitation of this issue requires user interaction in that a victim must open a malicious file.\n\n',
   'CVE-2024-41873': 'Media Encoder versions 24.5, 23.6.8 and earlier are affected by an out-of-bounds read vulnerability that could lead to disclosure of sensitive memory. An attacker could leverage this vulnerability to bypass mitigations such as ASLR. Exploitation of this issue requires user interaction in that a victim must open a malicious file.\n\n',
   'CVE-2024-39377': 'Media Encoder versions 24.5, 23.6.8 and 

In [19]:
data[2].get("cve").get('CVE-2023-25690')