### SIEM

In [None]:
import docker 
import requests
import time
import re
import os
import logging
from dataclasses import dataclass
from typing import Generator, Optional
from io import IOBase
from datetime import datetime, timedelta

from dotenv import load_dotenv

logger = logging.getLogger('SIEM')
logger.setLevel(logging.ERROR)

load_dotenv()
api_token = os.getenv('API_TOKEN')
api_token_2 = os.getenv('API_TOKEN_2')

In [None]:
def tail_follow(file: IOBase) -> Generator[str, None, None]:
    file.seek(0, 2)
    while True:
        line = file.readline()
        if line:
            yield line
            continue
        time.sleep(1)

In [None]:
def papertrail_tail(token: str) -> Generator[str, None, None]:
    url = "https://papertrailapp.com/api/v1/events/search.json"
    headers = {"X-Papertrail-Token": token}

    tail = False
    min_id = None
    limit = 10000

    while True:
        params = {"limit": limit}
        if tail and min_id:
            params["min_id"] = min_id
        else:
            params["min_time"] = int(time.time()) - 10
        
        r = requests.get(url, params=params, headers=headers, timeout=30)
        r.raise_for_status()
        data = r.json()
        tail = data.get("tail", False)
        min_id = data.get("max_id", None)
        yield from (e["message"] for e in data["events"])
        time.sleep(5)

In [None]:
@dataclass
class PacketInfo:
    """ Information about a network packet"""
    timestamp: str
    protocol: str
    src: str
    dst: str
    data: str
    

In [None]:
class TCPDumpParser:
    def __init__(self):
        self.current_packet_content = ""
        
    def parse_tcpdump(self, log: str):
        pattern = re.compile(r'(\d+:\d+:\d+\.\d+) (\w+) ([a-zA-Z0-9.]+) > ([a-zA-Z0-9.]+): (.+)$')
        lines = log.split('\n')
        for line in lines:
            match = pattern.match(line)
            if match:
                if self.current_packet_content:
                    yield PacketInfo(*self.current_packet_content)
                    
                if match.group(2) == 'IP':
                    self.current_packet_content = list(match.groups())
                else:
                    self.current_packet_content = ""
                
            else:
                if not "ARP" in line and self.current_packet_content:
                    self.current_packet_content[-1] += line

In [None]:
parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    # print(line)
    for pkt in parser.parse_tcpdump(line):
      print(pkt)

In [None]:
# Port scan detection
packet_counts = {}

time_window = timedelta(seconds=10)
packet_threshold = 5

start_time = None

parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")

            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                packet_counts.clear()
                start_time = pkt_time
                
            if pkt.dst not in packet_counts:
                packet_counts[pkt.dst] = 0
            else:
                packet_counts[pkt.dst] += 1
            
            if packet_counts[pkt.dst] > packet_threshold:
                logger.critical(f"Port scan attack detected from: {pkt.src}")

In [None]:
# DOS (SYN flood) detection
syn_counts = {}

time_window = timedelta(seconds=10)
packet_threshold = 1000

start_time = None

parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")

            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                syn_counts.clear()
                start_time = pkt_time

            if pkt.protocol == "TCP" and "S" in pkt.data:
                if pkt.dst not in syn_counts:
                    syn_counts[pkt.dst] = 0
                else:
                    syn_counts[pkt.dst] += 1

            if syn_counts[pkt.dst] > packet_threshold:
                logger.critical(f"DOS (SYN flood) attack from: {pkt.src}")

In [None]:
def parse_arp_reply(line: str) -> Optional[PacketInfo]:
    pattern = re.compile(r'(\d{2}:\d{2}:\d{2}.\d+) (\w+), Reply (\d+\.\d+\.\d+\.\d+) is-at ([0-9a-fA-F:]+), (.+)$')
    match = pattern.match(line)
    if match:
        return PacketInfo(*list(match.groups()))
    return None

In [None]:
# ARP Cache Poisoning detection
arps = {}
packet_window = 10

for line in papertrail_tail(api_token):
    pkt = parse_arp_reply(line)
    if not pkt:
        logger.warning(f"Packet format error: {line}")
    else:
        logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")

        if pkt.src not in arps:
            arps[pkt.src] = [pkt.dst]
        elif len(arps[pkt.src]) < packet_window:
            arps[pkt.src].append(pkt.dst)
        else:
            arps[pkt.src] = arps[pkt.src][1:] + [pkt.dst]
            if len(set(arps[pkt.src])) > 1:
                logger.critical(f"Possible ARP cache poisoning detected for IP {pkt.src}")

In [None]:
# Brute force detection
import json
from collections import defaultdict

packet_counts = {}

time_window = timedelta(minutes=10)
attempts_threshold = 2

start_time = None
failed_attempts = defaultdict(list)

parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")
            
            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                failed_attempts.clear()
                start_time = pkt_time
            
            match = re.search(r'\{.*\}', pkt.data)
            if match:
                json_data = match.group(0)
                j = json.loads(json_data)
                if "email" in j:
                    failed_attempts[j['email']].append(j)

                    if len(failed_attempts[j['email']]) > attempts_threshold:
                        print(f'Potential brute force attack detected on email address {j["email"]}.')

In [None]:
# Brute force detection
import json
from collections import defaultdict

packet_counts = {}

time_window = timedelta(minutes=10)
attempts_threshold = 2

start_time = None
failed_attempts = defaultdict(list)

def detect_sql_injection(input_string):
    patterns = [
        # SQL comment-style injections
        r'(--\s*\d)',
        r'(--\s*[a-zA-z])',
        r'(#\s*\d)',
        r'(#\s*[a-zA-z])',
        # Stacked queries
        r';',
        # OR and UNION-based injections
        r'(\s+or\s+)',
        r'(\s+union\s+)',
        # Basic SQL injections
        r'(\'\s*=\s*\')',
        r'(\'\s*or\s*\')',
        # Time-delay injections
        r'(waitfor\s+delay)',
        # Out-of-band injections
        r'(;\s*exec\s+master..xp_cmdshell)',
        # Other potentially harmful SQL keywords
        r'(\bdrop\b)',
        r'(\bupdate\b)',
        r'(\bdelete\b)',
        r'(\binsert\b)',
        r'(\bshutdown\b)',
        r'(\bpowershell\b)',
        r'(\bnet\s+user\b)',
        r'(\bexec\b)',
        r'(\bdeclare\b)',
        r'(\bcreate\b)',
        r'(\balter\b)',
        r'(\bdrop\b)',
        r'(\btruncate\b)',
    ]
    for pattern in patterns:
        if re.search(pattern, input_string, re.IGNORECASE):
            return True
    return False

parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")
            
            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                failed_attempts.clear()
                start_time = pkt_time
            
            match = re.search(r'\{.*\}', pkt.data)
            if match:
                json_data = match.group(0)
                print(json_data)
                if detect_sql_injection(json_data):
                    print(f'Potential SQL injection detected: {json_data}')

In [None]:
# Password spraying detection
import json
from collections import defaultdict

packet_counts = {}

time_window = timedelta(minutes=10)
attempts_threshold = 3

start_time = None
attempts = defaultdict(set)

parser = TCPDumpParser()
for line in papertrail_tail(api_token):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")
            
            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                failed_attempts.clear()
                start_time = pkt_time
            
            match = re.search(r'\{.*\}', pkt.data)
            if match:
                json_str = match.group(0)
                data = None
                try:
                    data = json.loads(json_str)
                except:
                    continue
                if "email" in data and "password" in data:
                    attempts[data['password']].add(data['email'])
                    if len(attempts[data['password']]) > attempts_threshold:
                        print(f'Potential password spraying attack detected on email address {data["email"]}.')

In [None]:
def get_container_ip_by_hostname(container_name):
    client = docker.from_env()

    try:
        container = client.containers.get(container_name)
        container.reload()  # Reload container information to get the latest data
        return str(container.attrs['NetworkSettings']['Networks']['siem_default']['IPAddress'])
    except docker.errors.NotFound as e:
        print(f"Container '{container_name}' not found.")
    except Exception as e:
        print(f"Error retrieving IP address: {e}")

In [None]:
# Exposed port detection
import json
from collections import defaultdict

packet_counts = {}

time_window = timedelta(minutes=10)
attempts_threshold = 2

start_time = None
failed_attempts = defaultdict(list)

# Source: https://utho.com/docs/tutorial/most-common-network-port-numbers-for-linux/
commonly_used_ports = [20, 21, 22, 23, 25, 53, 67, 68, 80, 443, 110, 995, 123, 137, 143, 161, 162, 993, 445, 465, 631]

parser = TCPDumpParser()

host_ip = get_container_ip_by_hostname('siem-victim_ubuntu-1')
print(host_ip)

for line in papertrail_tail(api_token_2):
    for pkt in parser.parse_tcpdump(line):
        if not pkt:
            logger.warning(f"Packet format error: {line}")
        else:
            logger.info(f"Packet from {pkt.src} to {pkt.dst}: {pkt.data}")
            
            pkt_time = datetime.strptime(pkt.timestamp, "%H:%M:%S.%f")

            if start_time is None or pkt_time - start_time > time_window:
                failed_attempts.clear()
                start_time = pkt_time

            src_addr_ip, src_addr_port = pkt.src.rsplit('.', 1)
            dst_addr_ip, dst_addr_port = pkt.dst.rsplit('.', 1)
            
            host_port_accessed = 0

            if src_addr_ip == host_ip:
                host_port_accessed = src_addr_port
            elif dst_addr_ip == host_ip:
                host_port_accessed = dst_addr_port
            
            if host_port_accessed != 0 and host_port_accessed not in commonly_used_ports:
                print(f'Potential port exposure! Detected traffic on unusual port: {host_port_accessed} [{pkt.timestamp} - SRC: {pkt.src}, DST: {pkt.dst}]')