|
| 1 | +import re |
| 2 | +from collections import Counter |
| 3 | + |
| 4 | +# Regular expressions for parsing the Apache Combined Log Format |
| 5 | +log_pattern = r'^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+)\s?(\S+)?\s?(\S+)?" (\d{3}) (\d+|-)' |
| 6 | + |
| 7 | +def parse_log(log_file_path): |
| 8 | + with open(log_file_path, 'r') as log_file: |
| 9 | + for line in log_file: |
| 10 | + match = re.match(log_pattern, line) |
| 11 | + if match: |
| 12 | + yield match.groups() |
| 13 | + |
| 14 | +def analyze_logs(log_file_path): |
| 15 | + # Initialize counters and sets to store information |
| 16 | + total_requests = 0 |
| 17 | + unique_visitors = set() |
| 18 | + page_visits = Counter() |
| 19 | + status_codes = Counter() |
| 20 | + potential_threats = set() |
| 21 | + |
| 22 | + for ip, _, _, _, _, _, url, status_code, _ in parse_log(log_file_path): |
| 23 | + total_requests += 1 |
| 24 | + unique_visitors.add(ip) |
| 25 | + page_visits[url] += 1 |
| 26 | + status_codes[status_code] += 1 |
| 27 | + |
| 28 | + # Detect potential security threats (e.g., 404 errors from the same IP) |
| 29 | + if status_code.startswith('4'): |
| 30 | + potential_threats.add((ip, url)) |
| 31 | + |
| 32 | + return total_requests, len(unique_visitors), page_visits, status_codes, potential_threats |
| 33 | + |
| 34 | +if __name__ == "__main__": |
| 35 | + log_file_path = "path/to/your/log/file.log" |
| 36 | + |
| 37 | + total_requests, unique_visitors, page_visits, status_codes, potential_threats = analyze_logs(log_file_path) |
| 38 | + |
| 39 | + print(f"Total Requests: {total_requests}") |
| 40 | + print(f"Unique Visitors: {unique_visitors}") |
| 41 | + print("\nPopular Pages:") |
| 42 | + for page, count in page_visits.most_common(10): |
| 43 | + print(f"{page}: {count} visits") |
| 44 | + |
| 45 | + print("\nStatus Codes:") |
| 46 | + for code, count in status_codes.items(): |
| 47 | + print(f"Status Code {code}: {count} occurrences") |
| 48 | + |
| 49 | + print("\nPotential Security Threats:") |
| 50 | + for ip, url in potential_threats: |
| 51 | + print(f"IP: {ip}, URL: {url}") |
0 commit comments