In [1]:
%%writefile weblogs.txt
# Date, Time, IP, Method, URL, Status, ResponseSize
2025-10-10,12:01:32,192.168.1.2,GET,/index.html,200,1024
2025-10-10,12:01:33,192.168.1.3,GET,/products.html,200,850
2025-10-10,12:01:35,192.168.1.4,GET,/contact.html,404,512
2025-10-10,12:01:38,192.168.1.5,POST,/checkout,500,128
2025-10-10,12:01:41,192.168.1.6,GET,/index.html,200,1024
2025-10-10,12:01:45,192.168.1.7,GET,/images/logo.png,200,256
2025-10-10,12:01:48,192.168.1.8,GET,/about.html,404,512
2025-10-10,12:01:53,192.168.1.9,POST,/login,403,64
2025-10-10,12:02:01,192.168.1.10,GET,/index.html,200,1024
2025-10-10,12:02:07,192.168.1.11,POST,/checkout,500,128
2025-10-10,12:02:12,192.168.1.12,GET,/contact.html,404,512
2025-10-10,12:02:15,192.168.1.13,GET,/index.html,200,1024
2025-10-10,12:02:21,192.168.1.14,GET,/products.html,200,850
2025-10-10,12:02:23,192.168.1.15,GET,/about.html,404,512
2025-10-10,12:02:29,192.168.1.16,POST,/checkout,500,128
2025-10-10,12:02:31,192.168.1.17,GET,/images/logo.png,200,256
2025-10-10,12:02:34,192.168.1.18,GET,/contact.html,404,512
2025-10-10,12:02:38,192.168.1.19,POST,/login,403,64
2025-10-10,12:02:41,192.168.1.20,GET,/index.html,200,1024
2025-10-10,12:02:47,192.168.1.21,GET,/products.html,200,850


Writing weblogs.txt


Implement the Mapper

In [2]:
def mapper(line):
    fields = line.strip().split(",")
    if len(fields) != 7 or fields[0].startswith('#'):
        return []

    #code status
    status = fields[5]

    return [(status, 1)]

Shuffle Phase

In [3]:
from collections import defaultdict

def shuffle(mapped_data):
    grouped = defaultdict(list)
    for key, value in mapped_data:
        grouped[key].append(value)
    return grouped

Reducer Phase

In [4]:
def reducer(shuffled_data):
    reduced = {}
    for key, values in shuffled_data.items():
        reduced[key] = sum(values)
    return reduced

Combine the Phases

In [5]:
# Mapper
mapped = []
with open("weblogs.txt", "r") as f:
    for line in f:
        mapped.extend(mapper(line))

# Shuffle
grouped = shuffle(mapped)

# Reduce
reduced = reducer(grouped)

for code, count in sorted(reduced.items()):
    print(f"HTTP {code}: {count} requests")


HTTP 200: 10 requests
HTTP 403: 2 requests
HTTP 404: 5 requests
HTTP 500: 3 requests


**url**

In [7]:
def mapper2(line):
    fields = line.strip().split(",")
    if len(fields) != 7 or fields[0].startswith('#'):
        return []

    url = fields[4]   # URL

    return [(url, 1)]

In [10]:
# Mapper
mapped = []
with open("weblogs.txt", "r") as f:
    for line in f:
        mapped.extend(mapper2(line))

# Shuffle
grouped = shuffle(mapped)

# Reduce
reduced = reducer(grouped)

for code, count in sorted(reduced.items()):
    print(f"url {code}: {count} requests")

url /about.html: 2 requests
url /checkout: 3 requests
url /contact.html: 3 requests
url /images/logo.png: 2 requests
url /index.html: 5 requests
url /login: 2 requests
url /products.html: 3 requests


**ResponseSize**

In [9]:
def mapper3(line):
    fields = line.strip().split(",")
    if len(fields) != 7 or fields[0].startswith('#'):
        return []

    status = fields[5]
    response_size = int(fields[6])

    return [(status, response_size)]

In [12]:
def reducer3(shuffled_data):
    reduced = {}
    for key, values in shuffled_data.items():
        reduced[key] = sum(values)
    return reduced

In [17]:
# Mapper
mapped = []
with open("weblogs.txt", "r") as f:
    for line in f:
        mapped.extend(mapper3(line))

# Shuffle
grouped = shuffle(mapped)

# Reduce
reduced = reducer3(grouped)

for code, count in sorted(reduced.items()):
    print(f"status {code}: {count} bytes")

status 200: 8182 bytes
status 403: 128 bytes
status 404: 2560 bytes
status 500: 384 bytes


**تصفية الاستجابات الناجحة (200) وتحليل الأخطاء فقط**

In [15]:
def mapper4(line):
    fields = line.strip().split(",")
    if len(fields) != 7 or fields[0].startswith('#'):
        return []

    status = fields[5]

    if status == "200":
        return []

    return [(status, 1)]

In [16]:
# Mapper
mapped = []
with open("weblogs.txt", "r") as f:
    for line in f:
        mapped.extend(mapper4(line))

# Shuffle
grouped = shuffle(mapped)

# Reduce
reduced = reducer(grouped)

for code, count in sorted(reduced.items()):
    print(f"HTTP {code}: {count} requests")

HTTP 403: 2 requests
HTTP 404: 5 requests
HTTP 500: 3 requests
