In [8]:
import pandas as pd

In [9]:
# function to Parse each line into components Return tuple
def parse_log_line(line):
    parts = line.split(',')
    if len(parts) == 4:
        timestamp, ip, status_code, endpoint = parts
        return timestamp, ip, status_code, endpoint
    else:
        return None



In [10]:
# function to Count unique IP addresses and return total count
def get_unique_visitors(logs):
    unique_ips = set()
    for log_entry in logs:
        if log_entry:
            unique_ips.add(log_entry[1])
    return len(unique_ips)

In [11]:
# function to Find most accessed endpoints and return list of tuples (endpoint, count)
def get_popular_endpoints(logs, top_n=5):
    endpoint_counts = {}
    for log_entry in logs:
        if log_entry:
            endpoint = log_entry[3]
            endpoint_counts[endpoint] = endpoint_counts.get(endpoint, 0) + 1
    sorted_endpoints = sorted(endpoint_counts.items(), key=lambda item: item[1], reverse=True)
    return sorted_endpoints[:top_n]

In [12]:
# function to Calculate percentage of 4xx/5xx status codes and return float percentage
def get_error_rate(logs):
    error_count = 0
    total_count = 0
    for log_entry in logs:
        if log_entry:
            status_code = int(log_entry[2])
            total_count += 1
            if 400 <= status_code < 600:
                error_count += 1
    if total_count == 0:
        return 0.0
    return (error_count / total_count) * 100

In [13]:
# function to Print formatted summary
def generate_report(filename):
    logs = []
    print("Parsed data:timestamp, IP address, status_code")
    with open(filename, 'r') as file:
        for line in file:
            parsed_line = parse_log_line(line.strip())
            if parsed_line:
                logs.append(parsed_line)
                print(parsed_line)

        unique_visitors = get_unique_visitors(logs)
        popular_endpoints = get_popular_endpoints(logs)
        error_rate = get_error_rate(logs)

        print("\n\n Log File Report:")
        print(f"\n\n Unique Visitors: {unique_visitors}")
        print("\n\n Popular Endpoints:")
        for endpoint, count in popular_endpoints:
            print(f"- {endpoint}: {count}")
        print(f"\n\n Error Rate: {error_rate:.2f}%")

In [14]:
generate_report("sample-log.txt")

Parsed data:timestamp, IP address, status_code
('2024-03-18 10:15:23', '192.168.1.101', '200', '/home')
('2024-03-18 10:15:25', '192.168.1.102', '404', '/profile/user123')
('2024-03-18 10:15:30', '192.168.1.101', '200', '/about')
('2024-03-18 10:15:45', '192.168.1.103', '200', '/products')
('2024-03-18 10:16:01', '192.168.1.104', '500', '/api/data')
('2024-03-18 10:16:15', '192.168.1.102', '200', '/home')
('2024-03-18 10:16:30', '192.168.1.105', '404', '/blog/post123')
('2024-03-18 10:16:45', '192.168.1.101', '200', '/products')
('2024-03-18 10:17:00', '192.168.1.106', '200', '/home')
('2024-03-18 10:17:15', '192.168.1.107', '403', '/admin')
('2024-03-18 10:17:30', '192.168.1.102', '200', '/about')
('2024-03-18 10:17:45', '192.168.1.108', '200', '/contact')
('2024-03-18 10:18:00', '192.168.1.109', '500', '/api/users')
('2024-03-18 10:18:15', '192.168.1.101', '200', '/home')
('2024-03-18 10:18:30', '192.168.1.110', '200', '/products')
('2024-03-18 10:18:45', '192.168.1.102', '404', '/im