## Lempel-Ziv-Welch Encoding

In [105]:

import sys
from sys import argv
from struct import *
from timeit import default_timer as timer
import os
from csv import writer, reader

##### File encoding handler

In [106]:

def encode_file(file_to_encode):
    maximum_table_size = pow(2,int(16))      
    file = open(file_to_encode)                 
    data = file.read()                      
    file.close()

    size = 256                   
    char_map = {chr(i): i for i in range(size)}    
    string = ""             
    encoded_msg = []    

    for symbol in data:                     
        string_plus_symbol = string + symbol
        if string_plus_symbol in char_map: 
            string = string_plus_symbol
        else:
            encoded_msg.append(char_map[string])
            if(len(char_map) <= maximum_table_size):
                char_map[string_plus_symbol] = size
                size += 1
            string = symbol

    if string in char_map:
        encoded_msg.append(char_map[string])

    enc_file = "./Encoded_Files/" + file_to_encode.split('/')[-1].split('.')[0] + "_LZW_encoded"
    

    with open(enc_file, "wb") as output_file:
        for data in encoded_msg:
            output_file.write(pack('>H',int(data)))
            
    return enc_file

##### File decoding handler

In [107]:
def decode_file(encoded_file, decode_to_file):
    encoded_msg = []
    next_code = 256
    deencoded_msg = ""
    string = ""

    with open(encoded_file, "rb") as file:
        while True:
            rec = file.read(2)
            if len(rec) != 2:
                break
            (data, ) = unpack('>H', rec)
            encoded_msg.append(data)

    size = 256
    char_map = dict([(x, chr(x)) for x in range(size)])

    for code in encoded_msg:
        if not (code in char_map):
            char_map[code] = string + (string[0])
        deencoded_msg += char_map[code]
        if not(len(string) == 0):
            char_map[next_code] = string + (char_map[code][0])
            next_code += 1
        string = char_map[code]

    with open(decode_to_file, "w") as output_file:
        for data in deencoded_msg:
            output_file.write(data)

In [108]:
def get_compression_ratio(original_file, encoded_file):
    og_size = os.path.getsize(original_file)
    enc_size = os.path.getsize(encoded_file)

    ratio = og_size/enc_size
    return og_size, enc_size, ratio

In [109]:
def Lempel_Ziv_Welch(file_to_be_encoded):
    start = timer()
    encoded_file = encode_file(file_to_be_encoded)
    end = timer()

    enc_time = end - start

    decode_to_file = "./Decoded_Files/" + file_to_be_encoded.split('/')[-1].split('.')[0] + "_LZW_decoded"

    start = timer()
    decode_file(encoded_file, decode_to_file)
    end = timer()

    dec_time = end - start

    print(enc_time, dec_time)

    og_size, enc_size, ratio = get_compression_ratio(file_to_be_encoded, encoded_file)

    print(og_size, enc_size, ratio)

In [110]:
filepath = "./Input_Files/"

Lempel_Ziv_Welch(filepath+"Shakespeare.txt")

0.031572200008668005 0.06552659999579191
125179 62748 1.9949480461528655
