In [None]:
# Reading CLF (common log format) lines with regex.
# <IP Address> <Client Id> <User Id> <Time> <Request> <Status> <Size>

import re

line = '127.0.0.1 - rj [13/Nov/2019:14:34:30 -0000] "GET /index.html HTTP/1.0" 200 512'

clf_pattern = re.compile(
    r'(?P<IP>\d+\.\d+\.\d+\.\d+) '            # IP Address
    r'(?P<ClientId>\S+) '                     # Client Id
    r'(?P<UserId>\S+) '                       # User Id
    r'\[(?P<Time>[^\]]+)\] '                  # Time (including timezone)
    r'"(?P<Request>[^"]*)"\s'                 # Request line
    r'(?P<Status>\d{3}) '                     # Status code
    r'(?P<Size>\S+)'                          # Size (bytes or '-')
)

match = clf_pattern.match(line)
if match:
    print("IP:", match.group("IP"))
    print("Client Id:", match.group("ClientId"))
    print("User Id:", match.group("UserId"))
    print("Time:", match.group("Time"))
    print("Request:", match.group("Request"))
    print("Status:", match.group("Status"))
    print("Size:", match.group("Size"))
else:
    print("No match")


In [None]:
# Reading and iterating logs in CLF from a file
# Only matches logs from 2024

import re

with open("access_logs.txt", "r") as log_file:
  content = log_file.read()

r = r'^\S+ \S+ (?P<ClientName>\S+) \[(?P<DateTime>\d{2}/\w{3}/2024:\d{2}:\d{2}:\d{2} [+\-]\d{4})\]'
matched = re.finditer(r, content, re.MULTILINE)

for m in matched:
  print(m.group("ClientName", "DateTime"))