Log Analysis Script
----------------------
Here is a Python script that processes log files to analyze web traffic and detect suspicious activity. In this script, three tasks are performed: it counts requests per IP address, identifies the most frequently accessed endpoints, and flags IP addresses with failed login attempts exceeding a configurable threshold (default: 10 attempts).The results are displayed in the terminal and saved to a CSV file for further analysis.

Author: K.R. Ankith

In [31]:
# importing libraries
import re
import csv

In [32]:
# constants
log_file = "sample.log"
output_file_CSV = "log_analysis_results.csv"
login_failed_threshold = 10
pattern = r'(?P<ip_address>\S+) .* "(?P<method>\S+) (?P<endpoint>\S+) HTTP/\S+" (?P<status>\d+)'

# dictionary initialisation for data aggregation
ip_requests = {}
endpoint_requests = {}
failed_logins = {}

# Processing the log file(sample.log)
with open(log_file, "r") as file:
    for line in file:
        match = re.match(pattern, line)
        if match:
            ip = match.group("ip_address")
            endpoint = match.group("endpoint")
            status = int(match.group("status"))

            # Task 1: Count requests per IP Address
            if ip in ip_requests:
                ip_requests[ip] += 1
            else:
                ip_requests[ip] = 1

            # Count requests per endpoint
            if endpoint in endpoint_requests:
                endpoint_requests[endpoint] += 1
            else:
                endpoint_requests[endpoint] = 1

            # Task 3: Count failed login attempts (status 401)
            if status == 401 or "Invalid credentials" in line:
                if ip in failed_logins:
                    failed_logins[ip] += 1
                else:
                    failed_logins[ip] = 1

# Task 2: Determine the most accessed endpoint
most_accessed_endpoint = max(endpoint_requests.items(), key=lambda x: x[1])

# Task 4: Write results to CSV
with open(output_file_CSV, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write requests per IP ADDRESS
    writer.writerow(["REQUESTS PER IP ADDRESS:-"])
    writer.writerow([])
    writer.writerow(["IP ADDRESS", "REQUEST COUNTS"])
    for ip, count in sorted(ip_requests.items(), key=lambda x: x[1], reverse=True):
        writer.writerow([ip, count])

    # Write most accessed endpoint
    writer.writerow([])
    writer.writerow(["MOST ACCESSED ENDPOINT:-"])
    writer.writerow([])
    writer.writerow(["ENDPOINT", "ACCESS COUNTS"])
    writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

    # Write suspicious activity
    writer.writerow([])
    writer.writerow(["SUSPICIOUS ACTIVITY"])
    writer.writerow([])
    writer.writerow(["IP ADDRESS", "LOGIN FAILED COUNTS"])
    if failed_logins:
        suspicious_found = False
        for ip, count in failed_logins.items():
            if count > login_failed_threshold:
                writer.writerow([ip, count])
                suspicious_found = True
        if not suspicious_found:
            writer.writerow(["None", "None"])
    else:
        writer.writerow(["None", "None"])

# Display results in the terminal
print("-----------------------------------")
print("REQUESTS PER IP-ADDRESS:")
print(f"{'IP Address':<20} {'Request count'}")
for ip, count in sorted(ip_requests.items(), key=lambda x: x[1], reverse=True):
    print(f"{ip:<20} {count}")

print("\n-----------------------------------")

print("\nMost Frequently Accessed Endpoint:")
print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

print("\n-----------------------------------")

print("\nSuspicious Activity Detected:")
print(f"{'IP Address':<20} {'Failed Login Attempts'}")
if failed_logins:
    suspicious_found = False
    for ip, count in failed_logins.items():
        if count > login_failed_threshold:
            print(f"{ip:<20} {count}")
            suspicious_found = True
    if not suspicious_found:
        print(f"{"None":<20}{'None'}")
else:
    print("None")

print("\n-----------------------------------")

-----------------------------------
REQUESTS PER IP-ADDRESS:
IP Address           Request count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

-----------------------------------

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

-----------------------------------

Suspicious Activity Detected:
IP Address           Failed Login Attempts
None                None

-----------------------------------
