In [6]:
# from [https://rosettacode.org/wiki/LZW_compression#Python]
# Python LZW

def compress(uncompressed):
    """Compress a string to a list of output symbols."""

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((chr(i), i) for i in range(dict_size))
    # in Python 3: dictionary = {chr(i): i for i in range(dict_size)}

    w = ""
    result = []
    for c in uncompressed:
        wc = w + c
        if wc in dictionary:
            w = wc
        else:
            result.append(dictionary[w])
            # Add wc to the dictionary.
            dictionary[wc] = dict_size
            dict_size += 1
            w = c

    # Output the code for w.
    if w:
        result.append(dictionary[w])
    return result


def decompress(compressed):
    """Decompress a list of output ks to a string."""
    from io import StringIO

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((i, chr(i)) for i in range(dict_size))
    # in Python 3: dictionary = {i: chr(i) for i in range(dict_size)}

    # use StringIO, otherwise this becomes O(N^2)
    # due to string concatenation in a loop
    result = StringIO()
    w = chr(compressed.pop(0))
    result.write(w)
    for k in compressed:
        if k in dictionary:
            entry = dictionary[k]
        elif k == dict_size:
            entry = w + w[0]
        else:
            raise ValueError('Bad compressed k: %s' % k)
        result.write(entry)

        # Add w+entry[0] to the dictionary.
        dictionary[dict_size] = w + entry[0]
        dict_size += 1

        w = entry
    return result.getvalue()


# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)

decompressed = decompress(compressed)
print (decompressed)

[84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]
TOBEORNOTTOBEORTOBEORNOT


In [7]:
# example to convert decimal_number to a binary string
decimal_number =2634

# Convert to binary and remove the '0b' prefix --> (string representation) 
binary_representation = bin(decimal_number)[2:]

# Pad with leading zeros to make it 10 bits
binary_10_bit = binary_representation.zfill(10)

print(f"The 10-bit binary representation of {decimal_number} is: {binary_10_bit}")

The 10-bit binary representation of 2634 is: 101001001010


In [8]:
# example to turn a binary string to int
binary_string = "00100000111"  # Example binary string

# Convert binary string to integer
decimal_number = int(binary_string, 2)

print(f"The integer value of the binary string {binary_string} is: {decimal_number}")

The integer value of the binary string 00100000111 is: 263


In [9]:
# as you can see from the python code for LZW, to convert int ascii_code <--> char
ascii_code = 65
character = chr(ascii_code)

print(f"The character corresponding to ASCII code {ascii_code} is: {character}")

# Encode the character using ASCII
encoded_byte = ord(character)

print(f"The ASCII code for '{character}' is: {encoded_byte}")

# If you want to save it as a bytes object (one byte)
byte_representation = bytes([encoded_byte])
print(f"The byte representation is: {byte_representation}")


The character corresponding to ASCII code 65 is: A
The ASCII code for 'A' is: 65
The byte representation is: b'A'


In [10]:
# file operation -- write

byte_representation = bytes([65, 66, 67])  # Example byte representation

# Specify the file path
file_path = "byte_representation.txt"

# Open the file in binary mode ('wb') to write bytes
with open(file_path, 'wb') as file:
    file.write(byte_representation)

print(f"Byte representation has been saved to {file_path}")


Byte representation has been saved to byte_representation.txt


In [11]:
# file operation -- read 

# Custom function to convert bytes to a binary string
def bytes_to_binary_string(byte_data):
    return ''.join(format(byte, '08b') for byte in byte_data)

file_path = "byte_representation.txt"

# Open the file in binary read mode ('rb')
with open(file_path, 'rb') as file:
    # Read the content of the file
    file_content = file.read()

# Display the content (as a bytes object)
print(f"Content of {file_path}: {file_content}")

# Use the function to convert file content to a binary string
binary_string_custom = bytes_to_binary_string(file_content)

print(f"Binary string representation (custom function): {binary_string_custom}")

# If you want to decode the bytes to a string (assuming it's text)
decoded_content = file_content.decode('utf-8')

print(f"Decoded content: {decoded_content}")

Content of byte_representation.txt: b'ABC'
Binary string representation (custom function): 010000010100001001000011
Decoded content: ABC


In [2]:
# a simpler version of unix diff

def compare_files(file_path1, file_path2):
    with open(file_path1, 'rb') as file1, open(file_path2, 'rb') as file2:
        content1 = file1.read()
        content2 = file2.read()

    return content1 == content2

# Example usage:
file1_path = "file.txt"
file2_path = "file.txt.2M"

if compare_files(file1_path, file2_path):
    print("The content of the files is identical.")
else:
    print("The content of the files is different.")

The content of the files is different.
