# Limpel-Ziv-Welch Decoder
This python note book contains code to decode a binary LZW encoded message into ASCII characters, the initial dictionary used can be found [here](https://theasciicode.com.ar/)

In [31]:
import copy
import pandas as pd

## Dictionary Creation

In [32]:
def front_fill_zeros(binary_number, desired_length):
    difference = desired_length - len(binary_number)
    # print(f"Difference {difference}")
    if difference == 0: return binary_number

    zeros = difference * '0'
    return zeros + binary_number

df = pd.read_csv('ascii-table.csv')
df = df.drop(columns=['Number'])

binary_numbers = []
for i in range(128):
    entry = bin(i)[2:]
    binary_numbers.append(entry)

max_length = max(len(number) for number in binary_numbers)
binary_numbers_new = [front_fill_zeros(i, max_length) for i in binary_numbers]
df['Binary'] = binary_numbers_new
df.at[0, 'symbol'] = 'NULL'

print(df)

    symbol                                        Description   Binary
0     NULL                                   (Null character)  0000000
1      SOH                                  (Start of Header)  0000001
2      STX                                    (Start of Text)  0000010
3      ETX                                      (End of Text)  0000011
4      EOT                              (End of Transmission)  0000100
..     ...                                                ...      ...
123      {                         (curly brackets or braces)  1111011
124      |  (vertical-bar, vbar, vertical line or vertical...  1111100
125      }                         (curly brackets or braces)  1111101
126      ~                               (Tilde ; swung dash)  1111110
127    DEL                                           (Delete)  1111111

[128 rows x 3 columns]


## Decoding fucntions

In [33]:
def get_symbol_length(dictionary: pd.DataFrame) -> int:
    return len(dictionary['Binary'].iloc[0])


def get_symbol(binary_code: str, dictionary:pd.DataFrame) -> str:
    row = dictionary.loc[dictionary['Binary'] == binary_code]
    return row['symbol'].values[0]


def update_num_bits(dictionary: pd.DataFrame) -> bool:
    last_entry = dictionary['Binary'].iloc[-1]
    # print(f"Last entry {last_entry}")
    if '0' in last_entry:
        return False
    return True


def add_to_dictionary(entry: str, dictionary: pd.DataFrame) -> pd.DataFrame:
    binary_num = bin(dictionary.shape[0])[2:] # [2:] to remove the '0b' from the start of the binary number
    previous_binary_num = dictionary['Binary'].iloc[-1]

    # print(f"New binary number {dictionary.shape[0]}")

    if len(binary_num) != len(previous_binary_num):
        # print(f"Transitioning from {len(previous_binary_num)} bits to {len(binary_num)} bits")
        dictionary = back_fill_zeros_dictionary(dictionary)

    new_row = pd.Series([entry, "-", binary_num], index=dictionary.columns)
    dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
    return dictionary


def back_fill_zeros_dictionary(dictionary: pd.DataFrame) -> pd.DataFrame:
    binaries = dictionary['Binary'].values
    new_length = len(binaries[0]) + 1
    # print(f"new length {new_length}")
    new_binaries = [front_fill_zeros(i, new_length) for i in binaries]
    dictionary['Binary'] = new_binaries
    # print(dictionary)
    return dictionary

def update_last_entry(symbol: str, dictionary: pd.DataFrame) -> pd.DataFrame:
    last_entry_symbol = dictionary['symbol'].iloc[-2]
    dictionary.at[dictionary.shape[0] - 1, 'symbol'] = last_entry_symbol + symbol
    return dictionary


def decode(binary_message: str, dictionary: pd.DataFrame) -> str:
    binary_message_copy = copy.copy(binary_message)

    ascii_message = ""
    first = True
    while binary_message_copy != "":

        symbol_length = get_symbol_length(dictionary)
        # print(f"Symbol length: {symbol_length}")
        symbol_code = binary_message_copy[:symbol_length]
        binary_message_copy = binary_message_copy.replace(symbol_code, "", 1)

        symbol = get_symbol(symbol_code, dictionary)
        # print(f"Symbol from dictionary: {symbol}")
        ascii_message = ascii_message + symbol


        if update_num_bits(dictionary):
            # print("Updating bits in dictionary")
            dictionary = back_fill_zeros_dictionary(dictionary)

        if not first:
            # print(f"updating dictionary")
            # print(dictionary)
            dictionary = update_last_entry(symbol, dictionary)
            # print(dictionary)
        
        dictionary = add_to_dictionary(symbol, dictionary)

        first = False

    return ascii_message


## Decoding usage

In [34]:

binary_message = '11111110110000101100010000000000110010010000001011000100110001100000011'
message = decode(binary_message, df)
print(f"Message {message}")

Message DELabNULLdDELabbcETX


  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
  dictionary = dictionary.append(new_row, ignore_index=True) # type: ignore
