##### Name : Harshit Khandelwal
##### Email : khandelwalharshit2002@gmail.com

### Accessing the log file saved as sample.log

In [7]:
file_path = r"C:\Users\Admin\Downloads\sample.log"
with open(file_path, 'r') as file:
    print("Reading log file...")
    for line in file:
        print(line.strip())

Reading log file...
192.168.1.1 - - [03/Dec/2024:10:12:34 +0000] "GET /home HTTP/1.1" 200 512
203.0.113.5 - - [03/Dec/2024:10:12:35 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:36 +0000] "GET /about HTTP/1.1" 200 256
192.168.1.1 - - [03/Dec/2024:10:12:37 +0000] "GET /contact HTTP/1.1" 200 312
198.51.100.23 - - [03/Dec/2024:10:12:38 +0000] "POST /register HTTP/1.1" 200 128
203.0.113.5 - - [03/Dec/2024:10:12:39 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.100 - - [03/Dec/2024:10:12:40 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:41 +0000] "GET /dashboard HTTP/1.1" 200 1024
198.51.100.23 - - [03/Dec/2024:10:12:42 +0000] "GET /about HTTP/1.1" 200 256
192.168.1.1 - - [03/Dec/2024:10:12:43 +0000] "GET /dashboard HTTP/1.1" 200 1024
203.0.113.5 - - [03/Dec/2024:10:12:44 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
203.0.113.5 - - [03/Dec/2024:10:12:45 +0000] "PO

### 1. **Count Requests per IP Address**:
    - Parsed the provided log file to extract all IP addresses.
    - Calculated the number of requests made by each IP address.
    - Sorted and displayed the results in descending order of request counts.

### 2. **Identify the Most Frequently Accessed Endpoint**:
    - Extract the endpoints (e.g., URLs or resource paths) from the log file.
    - Identify the endpoint accessed the highest number of times.
    - Provide the endpoint name and its access count.

### 3. **Detect Suspicious Activity**:
    - Identify potential brute force login attempts by:
        - Searching for log entries with failed login attempts (e.g., HTTP status code `401` or a specific failure message like "Invalid credentials").
        - Flagging IP addresses with failed login attempts exceeding a configurable threshold (default: 10 attempts).
    - Display the flagged IP addresses and their failed login counts.

In [None]:
import csv
from collections import Counter

In [18]:
def parse_log_file(file_path):
    """
    Parses the log file to extract:
    - IP request counts
    - Most accessed endpoint
    - Failed login attempts for suspicious activity detection
    """
    ip_requests = Counter()
    endpoint_requests = Counter()
    failed_logins = Counter()

    try:
        with open(file_path, 'r') as file:
            for line in file:
                parts = line.split()

                # Ensure the log line contains sufficient information
                if len(parts) > 6:
                    ip = parts[0]  # IP address
                    endpoint = parts[6]  # Resource path
                    status_code = parts[8] if len(parts) > 8 else ""

                    # Count requests by IP
                    ip_requests[ip] += 1

                    # Count requests to endpoints
                    endpoint_requests[endpoint] += 1

                    # Detect failed login attempts
                    if status_code == "401" or "Invalid credentials" in line:
                        failed_logins[ip] += 1

        return ip_requests, endpoint_requests, failed_logins
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None, None, None
    except Exception as e:
        print(f"An error occurred while parsing the log file: {e}")
        return None, None, None
    
    
def find_most_accessed_endpoint(endpoint_requests):
    """
    Finds the most frequently accessed endpoint from the Counter object.
    """
    if endpoint_requests:
        return max(endpoint_requests.items(), key=lambda x: x[1])
    return None, 0


def filter_suspicious_ips(failed_logins, threshold=10):
    """
    Filters IPs with failed login attempts exceeding the threshold.
    """
    return {ip: count for ip, count in failed_logins.items() if count > threshold}


def save_results_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips, output_file="log_analysis_results.csv"):
    """
    Saves analysis results to a CSV file.
    """
    try:
        with open(output_file, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)

            # Write Requests per IP
            writer.writerow(["Requests per IP"])
            writer.writerow(["IP Address", "Request Count"])
            for ip, count in ip_requests.items():
                writer.writerow([ip, count])

            writer.writerow([])  # Blank line

            # Write Most Accessed Endpoint
            writer.writerow(["Most Accessed Endpoint"])
            writer.writerow(["Endpoint", "Access Count"])
            writer.writerow(most_accessed_endpoint)

            writer.writerow([])  # Blank line

            # Write Suspicious Activity
            writer.writerow(["Suspicious Activity"])
            writer.writerow(["IP Address", "Failed Login Count"])
            for ip, count in suspicious_ips.items():
                writer.writerow([ip, count])

        print(f"Results saved to '{output_file}'.")
    except Exception as e:
        print(f"An error occurred while saving the file: {e}")


def display_results(ip_requests, most_accessed_endpoint, suspicious_ips):
    """
    Displays the analysis results in a clear, organized format in the terminal.
    """
    print("\nRequests per IP:")
    print(f"{'IP Address':<20} {'Request Count':<15}")
    print("-" * 35)
    for ip, count in ip_requests.items():
        print(f"{ip:<20} {count:<15}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"Endpoint: {most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity:")
    if suspicious_ips:
        print(f"{'IP Address':<20} {'Failed Login Count':<20}")
        print("-" * 40)
        for ip, count in suspicious_ips.items():
            print(f"{ip:<20} {count:<20}")
    else:
        print("No suspicious activity detected.")


def main():
    log_file_path = r"C:\Users\Admin\Downloads\sample.log" # Path to the log file
    threshold = 10  # Threshold for suspicious activity

    # Parse log file
    ip_requests, endpoint_requests, failed_logins = parse_log_file(log_file_path)

    if ip_requests and endpoint_requests and failed_logins:
        # Find the most accessed endpoint
        most_accessed_endpoint = find_most_accessed_endpoint(endpoint_requests)

        # Filter suspicious IPs
        suspicious_ips = filter_suspicious_ips(failed_logins, threshold)

        # Display results in the terminal
        display_results(ip_requests, most_accessed_endpoint, suspicious_ips)

        # Save results to CSV
        save_results_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips)


if __name__ == "__main__":
    main()


Requests per IP:
IP Address           Request Count  
-----------------------------------
192.168.1.1          7              
203.0.113.5          8              
10.0.0.2             6              
198.51.100.23        8              
192.168.1.100        5              

Most Frequently Accessed Endpoint:
Endpoint: /login (Accessed 13 times)

Suspicious Activity:
No suspicious activity detected.
Results saved to 'log_analysis_results.csv'.
