# Limpel-Ziv-Welch Decoder
This python note book contains code to decode a binary LZW encoded message into ASCII characters, the initial dictionary used can be found [here](https://theasciicode.com.ar/)

In [33]:
import copy
import pandas as pd

## Dictionary Creation

In [34]:
def front_fill_zeros(binary_number, desired_length):
    difference = desired_length - len(binary_number)
    # print(f"Difference {difference}")
    if difference == 0: return binary_number

    zeros = difference * '0'
    return zeros + binary_number

df = pd.read_csv('ascii-table.csv')
df = df.drop(columns=['Number'])

binary_numbers = []
for i in range(128):
    entry = bin(i)[2:]
    binary_numbers.append(entry)

max_length = max(len(number) for number in binary_numbers)
binary_numbers_new = [front_fill_zeros(i, max_length) for i in binary_numbers]
df['Binary'] = binary_numbers_new
df.at[0, 'symbol'] = 'NULL'

print(df)

    symbol                                        Description   Binary
0     NULL                                   (Null character)  0000000
1      SOH                                  (Start of Header)  0000001
2      STX                                    (Start of Text)  0000010
3      ETX                                      (End of Text)  0000011
4      EOT                              (End of Transmission)  0000100
..     ...                                                ...      ...
123      {                         (curly brackets or braces)  1111011
124      |  (vertical-bar, vbar, vertical line or vertical...  1111100
125      }                         (curly brackets or braces)  1111101
126      ~                               (Tilde ; swung dash)  1111110
127    DEL                                           (Delete)  1111111

[128 rows x 3 columns]


## Decoding fucntions

In [35]:
def get_symbol_length(dictionary: pd.DataFrame) -> int:
    return len(dictionary['Binary'].iloc[0])


def get_symbol(binary_code: str, dictionary:pd.DataFrame) -> str:
    row = dictionary.loc[dictionary['Binary'] == binary_code]
    return row['symbol'].values[0]


def update_num_bits(dictionary: pd.DataFrame) -> bool:
    last_entry = dictionary['Binary'].iloc[-1]
    # print(f"Last entry {last_entry}")
    if '0' in last_entry:
        return False
    return True


def add_to_dictionary(entry: str, dictionary: pd.DataFrame) -> pd.DataFrame:
    binary_num = bin(dictionary.shape[0])[2:] # [2:] to remove the '0b' from the start of the binary number
    previous_binary_num = dictionary['Binary'].iloc[-1]

    # print(f"New binary number {dictionary.shape[0]}")

    if len(binary_num) != len(previous_binary_num):
        # print(f"Transitioning from {len(previous_binary_num)} bits to {len(binary_num)} bits")
        dictionary = back_fill_zeros_dictionary(dictionary)

    new_row = pd.Series([entry, "-", binary_num], index=dictionary.columns).T
    new_row_df = pd.DataFrame(new_row).T
    dictionary = pd.concat([dictionary, new_row_df], ignore_index=True) # type: ignore
    return dictionary


def back_fill_zeros_dictionary(dictionary: pd.DataFrame) -> pd.DataFrame:
    binaries = dictionary['Binary'].values
    new_length = len(binaries[0]) + 1
    # print(f"new length {new_length}")
    new_binaries = [front_fill_zeros(i, new_length) for i in binaries]
    dictionary['Binary'] = new_binaries
    # print(dictionary)
    return dictionary

def update_last_entry(symbol: str, dictionary: pd.DataFrame) -> pd.DataFrame:
    last_entry_symbol = dictionary['symbol'].iloc[-1]
    print(f"updating {last_entry_symbol} to { last_entry_symbol + symbol[0]}")
    dictionary.at[dictionary.shape[0] - 1, 'symbol'] = last_entry_symbol + symbol[0]
    return dictionary


def decode(binary_message: str, dictionary: pd.DataFrame) -> str:
    binary_message_copy = copy.copy(binary_message)

    ascii_message = ""
    first = True
    while binary_message_copy != "":

        symbol_length = get_symbol_length(dictionary)
        # print(f"Symbol length: {symbol_length}")
        symbol_code = binary_message_copy[:symbol_length]
        binary_message_copy = binary_message_copy.replace(symbol_code, "", 1)

        symbol = get_symbol(symbol_code, dictionary)
        # print(f"Symbol from dictionary: {symbol}")
        ascii_message = ascii_message + symbol


        if update_num_bits(dictionary):
            # print("Updating bits in dictionary")
            dictionary = back_fill_zeros_dictionary(dictionary)

        if not first:
            # print(f"updating dictionary")
            # print(dictionary)
            dictionary = update_last_entry(symbol, dictionary)
            # print(dictionary)
        
        dictionary = add_to_dictionary(symbol, dictionary)

        first = False

    return ascii_message


## Decoding usage

In [36]:

# binary_message = '11111110110000101100010000000000110010010000001011000100110001100000011'
# binary_message = '110000100001010010000110110111101101101011011010111010101101110011010010110001101100001011101000110100101101111011011100010000001110011011110010111001101110100011001010110110100100000011001000110010101110011011010010110011110001110011100100110010101110001011101010110100110011101011100110010000001110100011010000110010110010110100110001001101010001110011011110110011000100000011010000110000101110010011001000111011110110000101001110110000101101110011001001000111110101100011101001011001110011101100011111001000110010011011011010111001100101100000010100111011101101001101001011010010010100110001000000110111101100010011010100110010101100011'
# message = decode(binary_message, df)

lzw_file_path = 'lzw_message.txt'
with open(lzw_file_path) as f:
    message = f.read()

print(message)
# message = 'asd  asd s'
ascii_message = decode(message, df)
ascii_message = ascii_message.replace("LF", "\n")
ascii_message = ascii_message.replace('SQO', '[')
ascii_message = ascii_message.replace('SQC', ']')
ascii_message = ascii_message.replace('RBO', '(')
ascii_message = ascii_message.replace('RBC', ')')
print(f"\nFinal message: {ascii_message}")
ascii_file_path = 'ascii_message.txt'
ascii_message = ascii_message.encode('utf-8')

with open(ascii_file_path, 'wb') as f:
    f.write(ascii_message)

1100001000010100100001101101111011011010110110101110101011011100110100101100011011000010111010001101001011011110110111000100000011100110111100101110011011101000110010101101101001000000110010001100101011100110110100101100111100011100111001001100101011100010111010101101001100111010111001100100000011101000110100001100101100101101001100010011010100011100110111101100110001000000110100001100001011100100110010001110111101100001010011101100001011011100110010010001111101011000111010010110011100111011000111110010001100100110110110101110011001011000000101001110111011010011010010110100100101001100010000001101111011000100110101001100101011000111000101101110110101001111010110010100100011100101011011001110011011011011100011010001011011011100110011100100000011011001011000001100111101001110111011001101111011011000111010101101101100110001100101010101101011010010110111001100110011011110111001001101101100010101000110010101011110100010111001011011101100011010110011100001010011001000110100110010010101101100