In [4]:
import re
import csv
from collections import defaultdict, Counter

# Configurable threshold for failed login attempts
FAILED_LOGIN_THRESHOLD = 10

def parse_log_file(file_path):
    #Parsing the log file 
    ip_requests = Counter()
    endpoint_requests = Counter()
    failed_login_attempts = defaultdict(int)

    # Regex patterns for log entries
    ip_pattern = r'(\d+\.\d+\.\d+\.\d+)'  # matching ip addresses
    endpoint_pattern = r'\"[A-Z]+\s([^\s]+)\sHTTP'  #matching the endpoints
    failed_login_pattern = r'401|Invalid credentials'  # Matching failed login attempts

    with open(file_path, 'r') as log_file:
        for line in log_file:
            # Extracting the IP addresses
            ip_match = re.search(ip_pattern, line)
            if ip_match:
                ip = ip_match.group(1)
                ip_requests[ip] += 1

            # Extracting the endpoints
            endpoint_match = re.search(endpoint_pattern, line)
            if endpoint_match:
                endpoint = endpoint_match.group(1)
                endpoint_requests[endpoint] += 1

            # Detecting failed login attempts
            
            if re.search(failed_login_pattern, line):
                if ip_match:
                    failed_login_attempts[ip] += 1

    return ip_requests, endpoint_requests, failed_login_attempts

def save_results_to_csv(ip_requests, most_accessed_endpoint, suspicious_activities, output_file):
    #Saving the results to a CSV file
    with open(output_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)

        # Write requests per IP
        csv_writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_requests.items():
            csv_writer.writerow([ip, count])

        # Write most accessed endpoint
        csv_writer.writerow([])
        csv_writer.writerow(["Most Frequently Accessed Endpoint"])
        csv_writer.writerow(["Endpoint", "Access Count"])
        csv_writer.writerow(most_accessed_endpoint)

        # Write suspicious activities
        csv_writer.writerow([])
        csv_writer.writerow(["Suspicious Activity Detected"])
        csv_writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_activities:
            csv_writer.writerow([ip, count])

def main():
    log_file = r"C:\Users\anike\OneDrive\Desktop\VRV Assignment\sample.txt"  # Replace with your log file path
    output_file = "log_analysis_results.csv"

    # Analyze the log file
    ip_requests, endpoint_requests, failed_login_attempts = parse_log_file(log_file)

    # Sort and display results
    sorted_ip_requests = ip_requests.most_common()
    most_accessed_endpoint = endpoint_requests.most_common(1)[0] if endpoint_requests else ("None", 0)
    suspicious_activities = [
        (ip, count) for ip, count in failed_login_attempts.items() if count > FAILED_LOGIN_THRESHOLD
    ]

    # Display results
    print("\nIP Address           Request Count")
    for ip, count in sorted_ip_requests:
        print(f"{ip:20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_activities:
        for ip, count in suspicious_activities:
            print(f"{ip:20} {count}")
    else:
        print("No suspicious activity detected.")

    # Save results to CSV
    save_results_to_csv(ip_requests, most_accessed_endpoint, suspicious_activities, output_file)
    print(f"\nResults saved to {output_file}")

if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv
