In [3]:
import re
from collections import Counter, defaultdict
import csv

# Configuration
FAILED_LOGIN_THRESHOLD = 10
LOG_FILE = "sample.log"
OUTPUT_FILE = "log_analysis_results.csv"

# Helper function to parse log file
def parse_log_file(log_file):
    ip_request_count = Counter()
    endpoint_count = Counter()
    failed_logins = defaultdict(int)
    
    with open(log_file, "r") as file:
        for line in file:
            # Extract IP address
            ip_match = re.match(r"(\d+\.\d+\.\d+\.\d+)", line)
            if ip_match:
                ip_address = ip_match.group(1)
                ip_request_count[ip_address] += 1
            
            # Extract endpoint
            endpoint_match = re.search(r'\"(?:GET|POST|PUT|DELETE) (\S+)', line)
            if endpoint_match:
                endpoint = endpoint_match.group(1)
                endpoint_count[endpoint] += 1
            
            # Detect failed login attempts
            if "401" in line or "Invalid credentials" in line:
                if ip_match:
                    failed_logins[ip_address] += 1
    
    return ip_request_count, endpoint_count, failed_logins

# Write results to CSV
def write_results_to_csv(output_file, ip_requests, most_accessed, suspicious_activity):
    with open(output_file, mode="w", newline="") as csvfile:
        writer = csv.writer(csvfile)

        # Write Requests per IP
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_requests:  # Process as a list of tuples
            writer.writerow([ip, count])
        writer.writerow([])  # Blank line

        # Write Most Accessed Endpoint
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(most_accessed)
        writer.writerow([])  # Blank line

        # Write Suspicious Activity
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_activity.items():  # This remains a dictionary
            writer.writerow([ip, count])


# Main script
if __name__ == "__main__":
    # Parse log file
    ip_request_count, endpoint_count, failed_logins = parse_log_file(LOG_FILE)
    
    # Determine most accessed endpoint
    most_accessed_endpoint, most_accessed_count = endpoint_count.most_common(1)[0]

    # Detect suspicious activity
    suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}

    # Display results
    print("\nRequests per IP:")
    print(f"{'IP Address':<20}{'Request Count'}")
    for ip, count in ip_request_count.most_common():
        print(f"{ip:<20}{count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint} (Accessed {most_accessed_count} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_ips:
        print(f"{'IP Address':<20}{'Failed Login Attempts'}")
        for ip, count in suspicious_ips.items():
            print(f"{ip:<20}{count}")
    else:
        print("No suspicious activity detected.")

    # Write results to CSV
    write_results_to_csv(
        OUTPUT_FILE,
        ip_request_count.most_common(),
        (most_accessed_endpoint, most_accessed_count),
        suspicious_ips
    )

    print(f"\nResults saved to {OUTPUT_FILE}")



Requests per IP:
IP Address          Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv
