# Compression algorithms

### Huffman:

In [24]:
def huffman(data):
    ...

### LZ77 / LZ78

In [25]:
def lz77(data):
    ...

#or

def lz78(data):
    ...

### LZW

In [26]:
class LZW:
    def encode(self, text:str) -> tuple[str, list]:
        '''LZW encoding'''
        output = []
        w = ''
        start_dictionary = {}
        counter = 0
        for letter in text:
            if letter not in start_dictionary:
                start_dictionary[letter] = counter
                counter += 1
        new_dictionary = start_dictionary.copy()
        for letter in text:
            if w + letter in new_dictionary:
                w += letter
            else:
                new_dictionary.update({w + letter: max(new_dictionary.values())+1})
                output.append(new_dictionary[w])
                w = letter
        output.append(new_dictionary[w])
        return output, list(start_dictionary.keys())

    def decode(self, code: list, coding_dict: list) -> str:
        '''LZW decoding'''
        coding_dict = {i:el for i,el in enumerate(coding_dict)}
        string = coding_dict[code[0]]
        output = ''
        output += string
        counter = max(coding_dict.keys()) + 1
        for i in range(len(code)-1):
            new = code[i + 1]
            if new not in coding_dict:
                entry = string + string[0]
            else:
                entry = coding_dict[new]
            output += entry
            coding_dict[counter] = string + entry[0]
            counter += 1
            string = entry
        return output


In [27]:
class LZW_binary:
    name = 'lzw'
    def encode(self, data: bytes) -> tuple[bytes, list]:
        '''LZW encoding'''
        output = []
        w = b''
        start_dictionary = {bytes([i]): i for i in range(256)}
        new_dictionary = start_dictionary.copy()
        for byte in data:
            byte = bytes([byte])
            if w + byte in new_dictionary:
                w += byte
            else:
                new_dictionary[w + byte] = len(new_dictionary)
                output.append(new_dictionary[w])
                w = byte
        output.append(new_dictionary[w])
        return output, start_dictionary

    def decode(self, code: bytes, coding_dict: dict) -> bytes:
        '''LZW decoding'''
        coding_dict = {v:k for k,v in coding_dict.items()}
        string = coding_dict[code[0]]
        output = bytearray()
        output += string
        for i in range(1, len(code)):
            new = code[i]
            if new not in coding_dict:
                entry = string + string[0:1]
            else:
                entry = coding_dict[new]
            output += entry
            coding_dict[len(coding_dict)] = string + entry[0:1]
            string = entry
        return bytes(output)


### Deflate

In [28]:
def deflate(data):
    ...

### Other

In [29]:
def other(data):
    ...

In [33]:
lzw = LZW_binary()
'/home/gllekk/all_the_code/default/DISCRETE/compression_research/img_test_1.png'
'/home/gllekk/all_the_code/default/DISCRETE/compression_research/img_test_1_after.lzw'
def encoding(path:str, compress_algorithm:object):
    with open(path, 'rb') as file:
        image = file.read()
    encoded_data, encoded_dict = compress_algorithm.encode(image)
    file_type = path[len(path)-3:]
    with open((path:=path[:-3]+compress_algorithm.name.lower()), 'wb') as file:
        for value in encoded_data:
            file.write(value.to_bytes(4, byteorder='big'))
    return path, encoded_dict, file_type, compress_algorithm

def decoding(path:str, start_dict:dict, f_type:str, compress_algorithm:object):
    with open(path, 'rb') as file:
        encoded_data = []
        while (byte := file.read(4)):
            encoded_data.append(int.from_bytes(byte, byteorder='big'))
    decoded = compress_algorithm.decode(encoded_data, start_dict)

    with open(path[:-4]+'_decoded.'+f_type, 'wb') as file:
        file.write(decoded)

enc = encoding('/home/gllekk/all_the_code/default/DISCRETE/compression_research/img_test_1.png', lzw)
# enc = encoding('/home/gllekk/all_the_code/default/DISCRETE/compression_research/lorem.txt', 'lzw', lzw)
# enc = encoding('/home/gllekk/all_the_code/default/DISCRETE/compression_research/mixkit-fast-rocket-whoosh-1714.wav', lzw)
dec = decoding(enc[0], enc[1], enc[2], enc[3])