The project provides quick summaries of Windows and Sysmon event IDs found in the log file. The information is sourced from the Ultimate IT Security Encyclopedia through the use of web scraping library BeautifulSoup.

In [52]:
# import libraries

from bs4 import BeautifulSoup
import requests
import numpy as np
import csv

###### **I. Preparing the foundation data**

In [53]:
# import mock data
with open('mock_data.csv') as csvfile:
  event_data = csv.reader(csvfile, delimiter=',')

  # extract header and create a mapping of column name to index
  headers = next(event_data)
  col_index = {name: i for i, name in enumerate(headers)}

  # using set ({}) to draw out only unique values
  u_event_ids = {row[col_index['winlog.event_id']] for row in event_data if row}

  # change from set into a list
  u_event_ids = list(u_event_ids)

  # a numerical sort
  sorted_ids = sorted(u_event_ids, key=int)
  print(sorted_ids)

['1', '2', '3', '7', '8', '10', '11', '12', '13', '17', '18', '22', '26', '29', '4624', '4625', '4634', '4647', '4648', '4672', '4732', '4776', '4798', '4799', '5058', '5059', '5061', '5379']


In [54]:
# convert sorted_ids to array
event_id_array = np.array(sorted_ids)
event_id_array

array(['1', '2', '3', '7', '8', '10', '11', '12', '13', '17', '18', '22',
       '26', '29', '4624', '4625', '4634', '4647', '4648', '4672', '4732',
       '4776', '4798', '4799', '5058', '5059', '5061', '5379'],
      dtype='<U4')

###### **II. Mapping the IDs and URL structure**

In [55]:
## url for different Sysmon and Windows count

# Windows IDs - 4 digits IDs
url_main = "https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid="

# Sysmon IDs - between 1 and 2 digits
url_main_1 = "https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000"
url_main_2 = "https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900"

In [56]:
# iterate through the array to determine which URL structure to use, if the ID = 4 digits, use main, if 2, then use url 2, else use url 1)

url_col = []
for item in event_id_array:
 digit_count = len(item)

 if digit_count == 4:
  value = url_main
 elif digit_count == 2:
  value = url_main_2
 else:
  value = url_main_1

 url_col.append(value)

print(url_col)

['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=900', 'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid

###### **III. Check and prep the event Ids and urls arrays before merging**

In [57]:
type(event_id_array), len(event_id_array)

(numpy.ndarray, 28)

In [58]:
type(url_col), len(url_col)

(list, 28)

In [59]:
# convert url column to numpy array
url_col_array = np.array(url_col)[:, np.newaxis]

In [60]:
url_col_array.shape

(28, 1)

In [61]:
event_id_array.shape

(28,)

In [62]:
# reshape event_id_array to match dimension with url_column to be able to concatenate column wise
event_id_array_2d = event_id_array[:, np.newaxis]

In [63]:
event_id_array_2d.shape

(28, 1)

###### **IV. Constructing the URLs**

In [64]:
# use hstack to concatenate the url column and the event ids together, in preparation to concatenate them to form working urls, show only first 5 results
url_stack = np.hstack((url_col_array, event_id_array_2d))
url_stack[:5]

array([['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000',
        '1'],
       ['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000',
        '2'],
       ['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000',
        '3'],
       ['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000',
        '7'],
       ['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=9000',
        '8']], dtype='<U88')

In [65]:
# string urls and ids together

urls_array = np.char.add(url_stack[:, 0],url_stack[:, 1])
urls_array[:5]

array(['https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=90001',
       'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=90002',
       'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=90003',
       'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=90007',
       'https://www.ultimatewindowssecurity.com/securitylog/encyclopedia/event.aspx?eventid=90008'],
      dtype='<U176')

###### **V. Getting Event ID short snippet**

In [66]:
# iterate through the list of working urls and extract the information from p tag, class 'hey'
content = []
for i in urls_array:
  url = i
  req = requests.get(url)
  soup = BeautifulSoup(req.text, 'html.parser')
  find_loc = soup.find_all('p', {'class': "hey"})
  for j in find_loc:
    content.append((j.text))

In [67]:
# remove tab or new line
content = [x.strip() for x in content]
content

['1: Process creation',
 '2: A process changed a file creation time',
 '3: Network connection detected',
 '7: Image loaded',
 '8: CreateRemoteThread',
 '10: ProcessAccess',
 '11: FileCreate',
 '12: RegistryEvent (Object create and delete)',
 '13: RegistryEvent (Value Set)',
 '17: Pipe created',
 '18: Pipe connected',
 '22: DNSEvent',
 '26: File Delete Logged',
 '29: File Executable Detected',
 '4624: An account was successfully logged on',
 '4625: An account failed to log on',
 '4634: An account was logged off',
 '4647: User initiated logoff',
 '4648: A logon was attempted using explicit credentials',
 '4672: Special privileges assigned to new logon',
 '4732: A member was added to a security-enabled local group',
 '4776: The domain controller attempted to validate the credentials for an account',
 "4798: A user's local group membership was enumerated.",
 '4799: A security-enabled local group membership was enumerated',
 '5058: Key file operation',
 '5059: Key migration operation',
 '50