In [None]:
# Какие данные и открытые источники будем обрабатывать. Примеры готовых решений от активных участников ИБ-сообщества. Здесь будут рассмотрены такие категории:
# - ИБ-новости
# - TI-отчёты
# - Уязвимости
# - Готовящиеся атаки
# - Индикаторы компрометации

# Как организовать сбор данных TI:
# - Парсинг информации об уязвимостях с официальных сайтов
# - Парсинг новостных сайтов
# - Парсинг TI-отчётов (pdf, web)
# - Парсинг XML (например Microsoft)
# - Парсинг Telegram
# - Сбор информации о фишинговых ресурсах

# Хранение и визуализация данных TI:
# - Plain text
# - JSON
# - STIX
# - “Приземление” в Telegram

# Детектирование фишинговых ресурсов:
# - Выгрузка списков зарегистрированных доменов по API
# - Использование регулярных выражений
# - Анализ whois-данных сайта
# - Анализ контента страницы (header, title, ключевые слова)
# - Детектирование неправомерного использования логотипов и товарных знаков

In [51]:
# Запустите ячейку, для установки всех зависимостей
!pip3 install requests==2.28.2
!pip3 install Telethon==1.27.0
!pip3 install beautifulsoup4==4.11.2
!pip3 install lxml==4.9.2
!pip3 install stix2==3.0.1
!pip3 install stix2-patterns==2.0.0
!pip3 install pandas==2.0.2
!pip3 install whois==0.9.27
!pip3 install selenium==4.10.0


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
^C
[31mERROR: Operation cancelled by user[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m

In [53]:
# Импортируем библиотеки
import os
import socket
import uuid
import re
import json
import xml.etree.ElementTree as ET
import sqlite3
from datetime import datetime

import requests
import whois
import pandas as pd
from bs4 import BeautifulSoup
from lxml.html import fromstring
from stix2.v21 import Identity, Indicator, Sighting, Bundle
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [12]:
# получаем html код страницы, если она доступна, иначе Error
def get_html_code(url: str):
  try:
    req = requests.get(url)
    if req.status_code == 200:
      return req.text
    else:
      return "Error"
  except Exception as e:
    print(e)
    return "Error"

# переводим html код в объект soup класса BeautifulSoup
def html_to_soup(html):
  return BeautifulSoup(html, features="html")


In [13]:
# Парсинг ссылок и новостей с securitylab

def parse_securitylab():
  html = get_html_code("https://www.securitylab.ru/news")
  soup = html_to_soup(html)
  titles, links = [], []
  for el in soup.find_all("a", {"class": "article-card inline-card"}):
    link = el["href"]
    links.append("https://www.securitylab.ru" + link)
    for title in el.find_all("h2", {"class": "article-card-title"}):
      titles.append(title.text)
  return titles, links

titles, links = parse_securitylab()
titles, links

(['HTML Smuggling — новая угроза для европейской кибербезопасности',
  'NYT раскрыла детали шпионского ПО для слежки за населением',
  'Похититель информации Meduza раскроет хакерам любые секреты своих жертв',
  'Четвертая промышленная революция: Утопия ИИ или реальность, которую мы не можем игнорировать?',
  'Хакеры используют новую стратегию вымогательства, объединив мощи программ Crysis и Venus',
  'ИИ позволяет эффективно бороться со скулшутингом и прочими актам насилия с применением оружия',
  'Квантовый взрыв в индустрии: новый квантовый компьютер Google на 47 лет опережает время',
  'Фишинг без слов: как не стать жертвой нового способа кражи аккаунтов',
  'Добро пожаловать в безопасное будущее: VMware, AMD и Samsung ведут отрасль к новой эпохе',
  'Взлом PolyNetwork обернулся для хакеров фиаско',
  'Популярную платформу микроблогинга Twitter* заполонили порноботы',
  'Ученые открывают двери будущего: фотонные чипы симулируют квантовые системы при комнатной температуре',
  'Полит

In [14]:
# Записываем данные в текстовый файл

if not os.path.isfile("news.txt"):
  open("news.txt", "w")

with open("news.txt", "r") as f:
  data = f.read().splitlines()

with open("news.txt", "a+") as f:
  for title, link in zip(titles, links):
    if not f"{title}, {link}" in data:
      f.write(f"{title}, {link}\n")

In [20]:
# Парсим уязвимости с сайта adobe

def parse_adobe():
  url = "https://helpx.adobe.com/security/Home.html"
  cve_url = "https://cve.mitre.org/cgi-bin/cvename.cgi?name="
  pattern = r"CVE-\d{4}-\d{2,5}"
  html = get_html_code(url)
  soup = BeautifulSoup(html)
  for tables in soup.find_all("table"):
    dictionary = {}
    CVE_dict = {}
    for link in tables.find_all("a"):
      link_to_cve = "https://helpx.adobe.com" + link['href']
      dictionary["ID"] = str(uuid.uuid4())
      dictionary["Product"] = link.text
      html_cve = get_html_code(link_to_cve)
      soup_cve = html_to_soup(html_cve)
      CVEs = set(re.findall(pattern, soup_cve.text))
      for cve in CVEs:
        html_mitre = get_html_code(cve_url + cve)
        soup_mitre = html_to_soup(html_mitre)
        description = soup_mitre.find("th", text="Description")
        description = description.find_next("td", colspan="2")
        cve_description = description.text
        CVE_dict[cve] = cve_description
      
      dictionary["CVE"] = CVE_dict
    
    # записываем данные в json файл
    with open("cve.json", "+a") as f:
      json.dump(dictionary, f, indent=4) 

parse_adobe()


  description = soup_mitre.find("th", text="Description")


In [21]:
# Парсим отчет на наличие IOCs в отчете

url = "https://unit42.paloaltonetworks.com/mirai-variant-targets-iot-exploits/"

def parse_ioc(url: str) -> tuple[str, list[str], list[str], list[str], list[str]]:
  req = requests.get(url)
  content = req.text
  soup = html_to_soup(content)
  title = soup.title.text

  pattern_cve = r"CVE-\d{4}-\d{5}"
  matches_cve = re.findall(pattern_cve, content)

  pattern_url = r"hxxps?://\S+"
  matches_url = re.findall(pattern_url, content)

  pattern_ip = r"\d{1,3}\.\d{1,3}\.\d{1,3}\[\.\]\d{1,3}"
  matches_ip = re.findall(pattern_ip, content)

  pattern_hash = r"[A-Fa-f0-9]{64}"
  matches_hash = re.findall(pattern_hash, content)

  return title, matches_cve, matches_url, matches_ip, matches_hash

title, matches_cve, matches_url, matches_ip, matches_hash = parse_ioc(url)
title, matches_cve, matches_url, matches_ip, matches_hash 

('IoT Under Siege: The Anatomy of the Latest Mirai Campaign Leveraging Multiple IoT Exploits',
 ['CVE-2019-12725',
  'CVE-2019-17621',
  'CVE-2019-20500',
  'CVE-2021-25296',
  'CVE-2021-46422',
  'CVE-2022-27002',
  'CVE-2022-29303',
  'CVE-2022-30023',
  'CVE-2022-30525',
  'CVE-2022-31499',
  'CVE-2022-36266',
  'CVE-2022-40005',
  'CVE-2022-45699',
  'CVE-2023-25280',
  'CVE-2023-27240',
  'CVE-2019-12725',
  'CVE-2019-17621',
  'CVE-2019-20500',
  'CVE-2021-25296',
  'CVE-2021-46422',
  'CVE-2022-27002',
  'CVE-2022-29303',
  'CVE-2022-30023',
  'CVE-2022-30525',
  'CVE-2022-31499',
  'CVE-2022-37061',
  'CVE-2022-40005',
  'CVE-2022-45699',
  'CVE-2023-25280',
  'CVE-2023-27240',
  'CVE-2019-12725',
  'CVE-2019-12725',
  'CVE-2019-17621',
  'CVE-2019-17621',
  'CVE-2019-20500',
  'CVE-2019-20500',
  'CVE-2021-25296',
  'CVE-2021-25296',
  'CVE-2021-46422',
  'CVE-2021-46422',
  'CVE-2022-27002',
  'CVE-2022-27002',
  'CVE-2022-29303',
  'CVE-2022-29303',
  'CVE-2022-30023',
  'CV

In [26]:
# функция, обогащающая CVE при помощи cve.mitre.org

def cve_enrich(cve: str) -> tuple[str, str]:
  base_url = "https://cve.mitre.org/cgi-bin/cvename.cgi?name="
  link = base_url + cve
  html = get_html_code(link)
  soup = html_to_soup(html)
  description = soup.find("th", string="Description")
  description = description.find_next("td", colspan="2")
  description = description.text
  return cve, description

cve_enrich('CVE-2022-30525')

CVE-2022-30525 A OS command injection vulnerability in the CGI program of Zyxel USG FLEX 100(W) firmware versions 5.00 through 5.21 Patch 1, USG FLEX 200 firmware versions 5.00 through 5.21 Patch 1, USG FLEX 500 firmware versions 5.00 through 5.21 Patch 1, USG FLEX 700 firmware versions 5.00 through 5.21 Patch 1, USG FLEX 50(W) firmware versions 5.10 through 5.21 Patch 1, USG20(W)-VPN firmware versions 5.10 through 5.21 Patch 1, ATP series firmware versions 5.10 through 5.21 Patch 1, VPN series firmware versions 4.60 through 5.21 Patch 1, which could allow an attacker to modify specific files and then execute some OS commands on a vulnerable device.






In [22]:
# Трансформируем IOCs в STIX формат

# сущность
identity = {
            "type": "identity",
            "spec_version": "2.1",
            "id": "identity--1206ba14-478f-4b0b-9a48-395f690c20a2",
            "created": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "modified": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "name": title,
            "identity_class": "TI Report",
        }


objects = []
objects.append(identity)

for hash in matches_hash:
    # шаблон индикатора sha256
    indicator = {
            "type": "indicator",
            "spec_version": "2.1",
            "id": None,
            "created_by_ref": "identity--1206ba14-478f-4b0b-9a48-395f690c20a2",
            "created": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "modified": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "name": "Malicious HASH",
            "pattern": None,
            "pattern_type": "stix",
            "valid_from": "2015-06-29T09:10:15.915Z"
    }
    indicator["id"] = f"indicator--{str(uuid.uuid4())}"
    indicator["pattern"] = f"[file:hashes.'SHA-256' = '{hash}']"
    objects.append(indicator)

for ip in matches_ip:
    # шаблон индикатора ipv4
    indicator = {
            "type": "indicator",
            "spec_version": "2.1",
            "id": None,
            "created_by_ref": "identity--1206ba14-478f-4b0b-9a48-395f690c20a2",
            "created": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "modified": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "name": "Malicious IP",
            "pattern": None,
            "pattern_type": "stix",
            "valid_from": datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    } 
    indicator["id"] = f"indicator--{str(uuid.uuid4())}"
    indicator["pattern"] = f"[ipv4-addr:value = '{ip}']"
    objects.append(indicator)

bundle = Bundle(objects=objects) # создаем объект bundle
data = bundle.serialize(indent=4) # переводим bundle в json формат для удобного отображения
print(data)

{
    "type": "bundle",
    "id": "bundle--d170b245-ec85-45dd-91da-ba928106b00a",
    "objects": [
        {
            "type": "identity",
            "spec_version": "2.1",
            "id": "identity--1206ba14-478f-4b0b-9a48-395f690c20a2",
            "created": "2023-07-03T19:51:25.230189Z",
            "modified": "2023-07-03T19:51:25.230219Z",
            "name": "IoT Under Siege: The Anatomy of the Latest Mirai Campaign Leveraging Multiple IoT Exploits",
            "identity_class": "TI Report"
        },
        {
            "type": "indicator",
            "spec_version": "2.1",
            "id": "indicator--3b08902d-8c3f-4417-92f6-82577e1fec5c",
            "created_by_ref": "identity--1206ba14-478f-4b0b-9a48-395f690c20a2",
            "created": "2023-07-03T19:51:25.230353Z",
            "modified": "2023-07-03T19:51:25.23036Z",
            "name": "Malicious HASH",
            "pattern": "[file:hashes.'SHA-256' = '888f4a852642ce70197f77e213456ea2b3cfca4a592b94647827ca45a

In [24]:
# забираем фишинговые ресурсы с phishtank

url = "https://phishtank.org/phish_search.php?valid=y&active=All&Search=Search"

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

dfs = pd.read_html(r.text)[0].head(20)
dfs

Unnamed: 0,ID,Phish URL,Submitted,Valid?,Online?
0,8209529,https://resgatepontosagora.us/WUTNRPSLW3SYBK2Q...,by MacaulyDorsey,VALID PHISH,ONLINE
1,8209347,https://www.bluesbeautybar.co/ added on Jul 3r...,by buaya,VALID PHISH,ONLINE
2,8209346,https://home-107485.weeblysite.com/ added on J...,by buaya,VALID PHISH,ONLINE
3,8209345,https://home-107623.square.site/ added on Jul ...,by buaya,VALID PHISH,ONLINE
4,8209344,https://home-107623.weeblysite.com/ added on J...,by buaya,VALID PHISH,ONLINE
5,8209343,https://home-107708.square.site/ added on Jul ...,by buaya,VALID PHISH,ONLINE
6,8209342,https://home-107708.weeblysite.com/ added on J...,by buaya,VALID PHISH,ONLINE
7,8209341,https://home-107717.square.site/ added on Jul ...,by buaya,VALID PHISH,ONLINE
8,8209340,https://home-107717.weeblysite.com/ added on J...,by buaya,VALID PHISH,ONLINE
9,8209339,https://home-107718.square.site/ added on Jul ...,by buaya,VALID PHISH,ONLINE


In [33]:
# создаем БД для фишинговых ресурсов

def create_database():
  conn = sqlite3.connect('phish_data.db')
  cursor = conn.cursor()

  cursor.execute('''CREATE TABLE phish_data (
                    ID INTEGER PRIMARY KEY AUTOINCREMENT,
                    Phish_URL TEXT,
                    Submitted TEXT,
                    Valid INTEGER,
                    Online INTEGER
                    )''')

  conn.commit()
  conn.close()

create_database()


In [35]:
conn = sqlite3.connect("phish_data.db") # подключаем к БД
c = conn.cursor()

sql_query = "INSERT INTO phish_data (ID, Phish_URL, Submitted, Valid, Online) VALUES (?, ?, ?, ?, ?)"

for index, row in dfs.iterrows():
  id_value = row['ID']
  url_value = row['Phish URL'].split()[0]
  submitted_value = row['Submitted']
  valid_value = row['Valid?']
  online_value = row['Online?']

  c.execute(sql_query, (id_value, url_value, submitted_value, valid_value, online_value)) # выполняем sql запрос

conn.commit() # подтверждаем изменения
conn.close() # закрываем подключение

IntegrityError: UNIQUE constraint failed: phish_data.ID

In [38]:
# парсим уязвимости с сайта microsoft в xml формате

def parse_microsoft() -> set[str]:
  CVE = set()
  url = "https://api.msrc.microsoft.com/cvrf/v2.0/document/2023-May"
  html = get_html_code(url)
  root = ET.fromstring(html)
  for child in root:
    if child.tag.endswith("Vulnerability"):
      for child2 in child:
        if child2.tag.endswith("CVE"):
          CVE.add(child2.text)
  return CVE

parse_microsoft()
  

{'CVE-2023-0458',
 'CVE-2023-1998',
 'CVE-2023-2006',
 'CVE-2023-2019',
 'CVE-2023-20958',
 'CVE-2023-2235',
 'CVE-2023-2426',
 'CVE-2023-2459',
 'CVE-2023-2460',
 'CVE-2023-2462',
 'CVE-2023-2463',
 'CVE-2023-2464',
 'CVE-2023-2465',
 'CVE-2023-2466',
 'CVE-2023-2467',
 'CVE-2023-2468',
 'CVE-2023-24881',
 'CVE-2023-24898',
 'CVE-2023-24899',
 'CVE-2023-24900',
 'CVE-2023-24901',
 'CVE-2023-24902',
 'CVE-2023-24903',
 'CVE-2023-24904',
 'CVE-2023-24905',
 'CVE-2023-24932',
 'CVE-2023-24939',
 'CVE-2023-24940',
 'CVE-2023-24941',
 'CVE-2023-24942',
 'CVE-2023-24943',
 'CVE-2023-24944',
 'CVE-2023-24945',
 'CVE-2023-24946',
 'CVE-2023-24947',
 'CVE-2023-24948',
 'CVE-2023-24949',
 'CVE-2023-24950',
 'CVE-2023-24953',
 'CVE-2023-24954',
 'CVE-2023-24955',
 'CVE-2023-2513',
 'CVE-2023-2609',
 'CVE-2023-2610',
 'CVE-2023-2721',
 'CVE-2023-2722',
 'CVE-2023-2723',
 'CVE-2023-2724',
 'CVE-2023-2725',
 'CVE-2023-2726',
 'CVE-2023-28251',
 'CVE-2023-28283',
 'CVE-2023-28290',
 'CVE-2023-29324'

# Сбор данных о домене

In [50]:
req = requests.get("https://01lottery.hire-up.pw/game") # посылаем https запрос
status_code = req.status_code # получаем код ответа
headers = req.headers # header
redirects = req.is_redirect # редиректы
ip_adr = socket.gethostbyname("01lottery.hire-up.pw") # получаем IP адрес домена
print(f"Status code: {req.status_code}\nHeader: {req.headers}\nRedirects: {req.is_redirect}\nIP: {ip_adr}")

Status code: 200
Header: {'Date': 'Mon, 03 Jul 2023 17:14:56 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Last-Modified': 'Thu, 30 Mar 2023 06:09:36 GMT', 'CF-Cache-Status': 'DYNAMIC', 'Report-To': '{"endpoints":[{"url":"https:\\/\\/a.nel.cloudflare.com\\/report\\/v3?s=qi%2BbWjcnGahBmQO0artOoNgF9JDlPRD0npl%2BTms26UxlxFi2CydGrlRXhepRlvLDR7UXjBLUYAYEcf2hyFiHiGoRpq8pqiUv5dHx1DFClaO%2FfJwcBuFFdLDZOTnWH3EmoIuL74GPmw%3D%3D"}],"group":"cf-nel","max_age":604800}', 'NEL': '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}', 'Server': 'cloudflare', 'CF-RAY': '7e10c484880fb7df-AMS', 'Content-Encoding': 'gzip', 'alt-svc': 'h3=":443"; ma=86400'}
Redirects: False
IP: 172.67.129.19


In [46]:
url = "youtube.com"
domain = whois.query(url) # получаем whois информацию о домене
print(domain.__dict__)

{'name': 'youtube.com', 'tld': 'com', 'registrar': 'MarkMonitor Inc.', 'registrant_country': 'US', 'creation_date': datetime.datetime(2005, 2, 15, 5, 13, 12), 'expiration_date': datetime.datetime(2024, 2, 15, 5, 13, 12), 'last_updated': datetime.datetime(2023, 1, 14, 9, 25, 19), 'status': 'clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited', 'statuses': ['clientDeleteProhibited (https://www.icann.org/epp#clientDeleteProhibited)', 'clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited', 'clientTransferProhibited (https://www.icann.org/epp#clientTransferProhibited)', 'clientTransferProhibited https://icann.org/epp#clientTransferProhibited', 'clientUpdateProhibited (https://www.icann.org/epp#clientUpdateProhibited)', 'clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited', 'serverDeleteProhibited (https://www.icann.org/epp#serverDeleteProhibited)', 'serverDeleteProhibited https://icann.org/epp#serverDeleteProhibited', 'serverTransferProhibited 

we have com


In [54]:
# Делаем скриншот веб-ресурса
options = Options()
options.add_argument('--headless') # закоментить, чтобы браузер физически открылся

browser = webdriver.Chrome(options=options)
browser.get("https://01lottery.hire-up.pw/game")
browser.save_screenshot("image.png")

True