<a href="https://colab.research.google.com/github/bhagavanthai724/python-foundation-set/blob/main/13_regex_alert_%26_severity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Write a regex that detects ERROR log lines containing the words "failed", "timeout", or "crash".
import re
pattern = re.compile(r"ERROR.*(failed|timeout|crash)", re.I)
log = "2024-01-01 ERROR system failed due to timeout"
print(bool(pattern.search(log)))

In [None]:
# Create a regex alert rule that triggers when a log line contains an IP address followed by "denied".
import re
pattern = re.compile(r"\b\d{1,3}(?:\.\d{1,3}){3}\b.*denied", re.I)
log = "192.168.1.55 access denied"
print(bool(pattern.search(log)))

In [None]:
# Build a function that extracts timestamps using regex and validates correct log timestamp format.
import re
def extract_timestamps(text: str):
    return re.findall(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", text)
log = "Started at 2025-03-02 12:33:44 and ended later."
print(extract_timestamps(log))

In [None]:
# Detect repeated login failures using a regex that matches patterns like "login failed" appearing multiple times.
import re
pattern = re.compile(r"(login failed.*){2,}", re.I | re.S)
log = "login failed\nuser retry\nlogin failed"
print(bool(pattern.search(log)))

In [None]:
# Write a regex that identifies suspicious URL paths containing encoded characters like %2F or %3B.
import re
pattern = re.compile(r"%2F|%3B", re.I)
log = "GET /admin%2Fpanel HTTP/1.1"
print(bool(pattern.search(log)))

In [None]:
# Extract only the severity labels (INFO, WARN, ERROR, CRITICAL) from a log using regex groups.
import re
pattern = re.compile(r"\b(INFO|WARN|ERROR|CRITICAL)\b")
log = "2025 system WARN event occurred"
print(pattern.findall(log))

In [None]:
# Build a regex that detects shell injection attempts (patterns containing ; | &&).
import re
pattern = re.compile(r"(;|\|\||&&)")
log = "user executed: rm -rf /;"
print(bool(pattern.search(log)))

In [None]:
# Implement a regex alert that fires when CPU or memory usage exceeds 90% inside a log string.
import re
pattern = re.compile(r"\b(CPU|Memory)\s*usage:\s*(9[0-9]|100)%")
log = "CPU usage: 95%"
print(bool(pattern.search(log)))

In [None]:
# Extract all email addresses present inside system logs using a regex pattern.
import re
pattern = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
log = "Alert sent to admin@example.com and support@domain.org"
print(pattern.findall(log))

In [None]:
# Write a regex to detect potential SQL injection strings (e.g., ' OR 1=1 --).
import re
pattern = re.compile(r"'?\s*OR\s+1=1|--|;--", re.I)
log = "SELECT * FROM users WHERE ' OR 1=1 --"
print(bool(pattern.search(log)))

In [None]:
# Categorize logs into low/medium/high severity based on regex: INFO=low, WARN=medium, ERROR/CRITICAL=high.
import re
def severity(line: str):
    if re.search(r"CRITICAL|ERROR", line): return "high"
    if re.search(r"WARN", line): return "medium"
    return "low"
log = "CRITICAL system failure"
print(severity(log))

In [None]:
# Create a regex that captures any IPv4 address followed by more than 5 rapid repeated requests.
import re
pattern = re.compile(r"(\d{1,3}(?:\.\d{1,3}){3}).*(request){5,}", re.I | re.S)
log = "10.0.0.5 request request request request request request"
print(bool(pattern.search(log)))

In [None]:
# Write a regex that flags logs containing suspicious file extensions like .exe, .bat, .sh.
import re
pattern = re.compile(r"\.(exe|bat|sh)\b", re.I)
log = "download malware.exe from server"
print(bool(pattern.search(log)))

In [None]:
# Extract all HTTP status codes using regex and classify 4xx as medium severity and 5xx as high severity.
import re
def classify(code: str):
    if re.match(r"5\d\d", code): return "high"
    if re.match(r"4\d\d", code): return "medium"
    return "low"
pattern = re.compile(r"\b(\d{3})\b")
log = "GET /index 503"
codes = pattern.findall(log)
print(codes, classify(codes[0]))

In [None]:
# Build a function that applies multiple regex patterns and returns the highest matched severity level.
import re
def max_severity(line: str, rules: dict):
    matched = [lvl for lvl, pat in rules.items() if re.search(pat, line)]
    if not matched: return "none"
    rank = {"low": 1, "medium": 2, "high": 3}
    return max(matched, key=lambda m: rank[m])
rules = {
    "high": re.compile(r"CRITICAL|ERROR"),
    "medium": re.compile(r"WARN"),
    "low": re.compile(r"INFO")
}
log = "2025-01-01 ERROR disk failure"
print(max_severity(log, rules))