In [1]:
import gzip

In [2]:
def extract_packets():
    allpackets = []
    with open('htmlhexdump.txt', 'r') as file:
        packet = ''
        for line in file:
            temp = line.strip().split('  ')
            if len(temp) >= 3:
                if temp[0].startswith('0000'):  # New packet start
                    if packet:
                        allpackets.append(packet)
                    packet = ''
                packet += ''.join(temp[1].split())
        if packet:  # Append last packet
            allpackets.append(packet)
    return allpackets

In [3]:
def parse_packet(data):
    hexdata = bytes.fromhex(data)

    if len(hexdata) < 34:  # Min Ethernet(14) + IP(20)
        return b''

    ethernet_header_len = 14
    ipheader_offset = ethernet_header_len
    ipheader = hexdata[ipheader_offset: ipheader_offset + 20]

    if (ipheader[0] >> 4) != 4:  # Ensure IPv4
        return b''

    ipheader_len = (ipheader[0] & 0x0F) * 4
    protocol = ipheader[9]

    if protocol != 6:  # Not TCP
        return b''

    tcpheader_offset = ethernet_header_len + ipheader_len
    if len(hexdata) < tcpheader_offset + 20:
        return b''

    tcpheader = hexdata[tcpheader_offset: tcpheader_offset + 20]
    tcplen = ((tcpheader[12] >> 4) & 0x0F) * 4

    httpheader_offset = tcpheader_offset + tcplen
    if len(hexdata) < httpheader_offset:
        return b''

    return hexdata[httpheader_offset:]

In [4]:
# Extract and parse packets
pkts = extract_packets()
httpdata = b''.join(parse_packet(pkt) for pkt in pkts)

In [5]:
# Extract headers
header_end = httpdata.find(b'\r\n\r\n')

headers = httpdata[:header_end].decode('utf-8', errors='ignore')
payload = httpdata[header_end + 4:]

In [6]:
# Store original (compressed) size
compressed_size = len(payload)

decompressed_payload = gzip.decompress(payload)
decompressed_size = len(decompressed_payload)  # Store uncompressed size
compression_ratio = decompressed_size / compressed_size if compressed_size > 0 else None
print(f"Compression Ratio: {compression_ratio:.2f}")

print(f"Original (Compressed) Size: {compressed_size} bytes")
print(f"Decompressed Size: {decompressed_size} bytes")
print(f"Compression Ratio: {compression_ratio:.2f}")

Compression Ratio: 4.92
Original (Compressed) Size: 31249 bytes
Decompressed Size: 153743 bytes
Compression Ratio: 4.92
