In [13]:
import re
import csv
from collections import defaultdict, Counter

# File paths (adjust as needed)
LOG_FILE = "sample.log"
OUTPUT_FILE = "log_analysis_results.csv"

# Configurable threshold for suspicious activity
THRESHOLD = 10

def parse_log_file(file_path):
    """Parse the log file and return structured data."""
    log_data = []
    log_pattern = re.compile(
        r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[(?P<timestamp>[^\]]+)\] "(?P<method>\w+) (?P<endpoint>[^\s]+) HTTP/\d+\.\d+" (?P<status>\d+) \d+(?: "(?P<message>.+)")?'
    )
    with open(file_path, 'r') as file:
        for line in file:
            match = log_pattern.match(line)
            if match:
                log_data.append(match.groupdict())
    return log_data

def count_requests_by_ip(log_data):
    """Count the number of requests made by each IP address."""
    ip_counts = Counter(entry['ip'] for entry in log_data)
    return ip_counts.most_common()

def most_frequent_endpoint(log_data):
    """Find the most frequently accessed endpoint."""
    endpoint_counts = Counter(entry['endpoint'] for entry in log_data)
    most_common = endpoint_counts.most_common(1)
    return most_common[0] if most_common else ("N/A", 0)

def detect_suspicious_activity(log_data, threshold):
    """Identify IPs with suspiciously high failed login attempts."""
    failed_attempts = defaultdict(int)
    for entry in log_data:
        if entry['status'] == "401" or ("Invalid credentials" in (entry.get('message') or "")):
            failed_attempts[entry['ip']] += 1
    return {ip: count for ip, count in failed_attempts.items() if count > threshold}

def save_results_to_csv(ip_counts, most_accessed, suspicious_activity):
    """Save analysis results to a CSV file."""
    with open(OUTPUT_FILE, 'w', newline='') as file:
        writer = csv.writer(file)

        # Write IP request counts
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_counts)

        # Write most accessed endpoint
        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(most_accessed)

        # Write suspicious activity
        writer.writerow([])
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        writer.writerows(suspicious_activity.items())

def main():
    """Main function to coordinate the analysis."""
    log_data = parse_log_file(LOG_FILE)

    # 1. Count requests by IP
    ip_counts = count_requests_by_ip(log_data)

    # 2. Find the most accessed endpoint
    most_accessed = most_frequent_endpoint(log_data)

    # 3. Detect suspicious activity
    suspicious_activity = detect_suspicious_activity(log_data, THRESHOLD)

    # Print results
    print("IP Address           Request Count")
    for ip, count in ip_counts:
        print(f"{ip:20} {count}")
    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed[0]} (Accessed {most_accessed[1]} times)")
    print("\nSuspicious Activity Detected:")
    print("IP Address           Failed Login Attempts")
    for ip, count in suspicious_activity.items():
        print(f"{ip:20} {count}")

    # Save results to CSV
    save_results_to_csv(ip_counts, most_accessed, suspicious_activity)
    print(f"\nResults saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts

Results saved to log_analysis_results.csv
