In [25]:
# Read the file
bit_list = []

with open('/home/dennis/Projects/ImperivmGBR/Game_original/config.ini', 'rb') as f:
    byte = f.read(1)
    while byte:
        # Convert byte to 8-bit binary string, e.g., '01101001'
        bits = format(ord(byte), '08b')
        # Extend the list with individual bits (as characters)
        bit_list.extend(bits)
        byte = f.read(1)

In [26]:
def read_bits(num):
    global bitOffset
    
    bit_str = ''.join(bit_list[bitOffset:bitOffset+num])

    bitOffset += num

    return int(bit_str, 2)

def readTable(length):
   
    length_bits = read_bits(2) + 2
    # print(f'Length bits: {length_bits}')

    table = []

    for _ in range(length):
        if length_bits > 0:
            value = read_bits(length_bits)
        else:
            value = 0
        table.append(value)
    
    return table

In [27]:
def generate_huffman_codes(lengths):

    num_entries = len(lengths)
    
    # Step 1: Find the maximum bit length
    max_length = max(lengths) if lengths else 0

    # Step 2: Count the number of codes for each bit length
    bl_count = [0] * (max_length + 1)
    for length in lengths:
        if length > 0:
            bl_count[length] += 1

    # Step 3: Determine the starting code for each bit length
    code = 0
    next_code = [0] * (max_length + 1)
    for bits in range(1, max_length + 1):
        code = (code + bl_count[bits - 1]) << 1
        next_code[bits] = code

    # Step 4: Assign codes to symbols
    huffman_codes = {}
    for symbol, length in enumerate(lengths):
        if length != 0:
            huffman_codes[symbol] = next_code[length]
            next_code[length] += 1

    # Step 5: Validate final code set (canonical Huffman validation)
    total_codes = sum(1 << (max_length - i) for i in range(1, max_length + 1) if bl_count[i])
    used_codes = next_code[max_length]
    if (used_codes - 1) & used_codes != 0:
        raise ValueError("INVALIDFORMAT: Generated codes are not canonical")

    return huffman_codes

In [28]:
# Tables
length_code_table = {
    256: (3,  0),
    257: (4,  0),
    258: (5,  0),
    259: (7,  0),
    260: (8,  0),
    261: (9,  0),
    262: (10,  0),
    263: (11,  0),
    264: (13, 1),
    265: (15, 1),
    266: (17, 1),
    267: (19, 1),
    268: (23, 2),
    269: (27, 2),
    270: (31, 2),
    271: (35, 2),
    272: (43, 3),
    273: (51, 3),
    274: (59, 3),
    275: (67, 3),
    276: (83, 4),
    277: (99, 4),
    278: (115, 4),
    279: (131, 4),
    280: (163, 5),
    281: (195, 5),
    282: (227, 5),
    283: (258, 5),
    284: (259, 0),
}

distance_code_table = {
    0: (1, 0),
    1: (2, 0),
    2: (3, 0),
    3: (4, 0),
    4: (5, 1),
    5: (7, 1),
    6: (9, 2),
    7: (13, 2),
    8: (17, 3),
    9: (25, 3),
    10: (33, 4),
    11: (49, 4),
    12: (65, 5),
    13: (97, 5),
    14: (129, 6),
    15: (193, 6),
    16: (257, 7),
    17: (385, 7),
    18: (513, 8),
    19: (769, 8),
    20: (1025, 9),
    21: (1537, 9),
    22: (2049, 10),
    23: (3073, 10),
    24: (4097, 11),
    25: (6145, 11),
    26: (8193, 12),
    27: (12289, 12),
    28: (16385, 13),
    29: (24577, 13),
}

In [29]:
# Huffman tree stuff

class HuffmanNode:
    def __init__(self, symbol=None):
        self.symbol = symbol
        self.left = None
        self.right = None

# Insert a Huffman code into the tree
def insert_huffman_code(tree_root, code, symbol):
    node = tree_root
    for bit in code:
        if bit == '0':  # Move left
            if node.left is None:
                node.left = HuffmanNode()
            node = node.left
        elif bit == '1':  # Move right
            if node.right is None:
                node.right = HuffmanNode()
            node = node.right
    node.symbol = symbol  # Assign the symbol at the leaf node

def build_decoding_tree(huffman_codes, lengths_table):
    root = HuffmanNode()  # The root of the Huffman tree
    for symbol, code in huffman_codes.items():
        bit_length = lengths_table[symbol]
        binary_code = format(code, f'0{bit_length}b')  # Format to a binary string
        insert_huffman_code(root, binary_code, symbol)
    return root

# Decode a sequence of bits using the Huffman tree
def decode_with_tree(tree_root, distance_tree_root, bit_stream):
    decoded_symbols = []
    node = tree_root
    i = 0
    while i < len(bit_stream):
        bit = bit_stream[i]
        if bit == '0':
            node = node.left
        elif bit == '1':
            node = node.right
        i += 1
        
        if node.symbol is not None:  # If we've reached a leaf node

            # print(f'New symbol found: {node.symbol} at index {i}')

            # Check if it's a literal (<255), else is a length symbol
            if node.symbol < 256:
                decoded_symbols.append(node.symbol)

            elif node.symbol == 285:
                pass

            else:
                baseLength = length_code_table[node.symbol][0]
                extraBits = length_code_table[node.symbol][1]
                # print(f'{i}: Symbol found: {node.symbol}: base length = {baseLength}, extra bits = {extraBits}')

                bit_str = ''.join(bit_stream[i:i+extraBits])

                offset = 0
                if len(bit_str) > 0:
                    offset =  int(bit_str, 2)
                    print(f'offset: {offset}')
                    

                i += extraBits

                lengthNode = distance_tree_root
                while True:
                    bit = bit_stream[i]
                    i += 1
                    if bit == '0':
                        lengthNode = lengthNode.left
                    elif bit == '1':
                        lengthNode = lengthNode.right

                    if lengthNode.symbol is not None:
                        break

                distance = lengthNode.symbol

                # print(f'{i}: Length found: {baseLength + offset}: distance = {distance}')

                # Perform LZ77-style copy
                if distance == 0 or distance > len(decoded_symbols):
                    print(f"[Warning] Invalid distance: {distance}")
                    break
                else:
                    for _ in range(baseLength + offset):
                        decoded_symbols.append(decoded_symbols[-distance])
                # print(distance)


            node = tree_root  # Reset to the root for the next symbol
            
            ascii_output = ''.join(chr(symbol) for symbol in decoded_symbols if symbol < 256)
            print(f'{ascii_output}')
        
    return decoded_symbols

In [30]:
# Debug functions
def print_distance_tree(node, prefix=""):
    if node is not None:
        if node.symbol is not None:
            print(f"Symbol: {node.symbol}, Code: {prefix}")
        print_distance_tree(node.left, prefix + "0")
        print_distance_tree(node.right, prefix + "1")


def bruteforce_tree_for_character(tree_root, target_char):
    target_ascii = ord(target_char)
    stack = [(tree_root, "")]
    while stack:
        node, path = stack.pop()
        if node.symbol == target_ascii:
            print(f"Found character '{target_char}' at path: {path}")
            return
        if node.left:
            stack.append((node.left, path + "0"))
        if node.right:
            stack.append((node.right, path + "1"))
    print(f"Character '{target_char}' not found in the tree.")
    return None

def navigate_tree(root, data, continuous = False):
    node = root
    buffer = []
    for bit in data:
        if bit == '0':
            node = node.left
        elif bit == '1':
            node = node.right
        
        buffer.append(bit)

        if node.symbol is not None:
            print(f'Found symbol: {node.symbol} "{''.join(chr(node.symbol))}" with bits: {"".join(buffer)}')
            
            if continuous:
                node = root
                buffer = []
            else:
                return
    
            

In [31]:
# Skip the header
bitOffset = 18*8

# Read the lengths table
lengthsTable = readTable(0x11e)

# Read the distances table
distanceTable = readTable(0x3c)

# Generate the Huffman codes
huffCodes = generate_huffman_codes(lengthsTable)

# Generate the distances codes
huffDistances = generate_huffman_codes(distanceTable)

In [32]:
# Debug prints

# print(f'Code lengths: {lengthsTable}')
# print(f'Distance lengths: {distanceTable}')
# print(f'Huffman codes: {huffCodes}')
# print(f'Huffman distances codes: {huffDistances}')

In [None]:
# Build the Huffman decoding tree
tree_root = build_decoding_tree(huffCodes, lengthsTable)

# Build the distance tree
distance_tree_root = build_decoding_tree(huffDistances, distanceTable)

# Decode the bitstream
bit_stream = ''.join(bit_list[bitOffset:])
decoded_output = decode_with_tree(tree_root, distance_tree_root, bit_stream)

In [34]:
ascii_output = ''.join(chr(symbol) for symbol in decoded_output[:70] if symbol < 256)
print(f'ASCII output: {ascii_output}')

ASCII output: [system]
;WindowX = 1024
;WindowX lwae768
DisableExceptionHandlerleE


In [35]:
# Testing cell
navigate_tree(tree_root, "1011101010", True)

Found symbol: 118 "v" with bits: 101110
