# TP1
## Question 2: ZLW

In [2]:
import numpy as np

### File loading and dictionary initialization

In [14]:
FILES = [f"texte_{i}.txt" for i in range(1,6)]
PATH = "../data/textes/"

def load_file(index: int) -> str:
	if index < 0 or index >= len(FILES):
		raise ValueError("Index out of range")
	with open(PATH+FILES[index],"r") as f:
		msg = f.read()
	return msg

def init_dict(msg: str) -> dict[str, str]:
	dict_symb = {}
	n_symb = 0
	for i in range(len(msg)):
		if msg[i] not in dict_symb:
			dict_symb[msg[i]] = f"{n_symb:b}"
			n_symb +=1
	return dict_symb

### Utils

In [4]:
def dict_update_size(dict_symb: dict[str, str]):
	n_symb = len(dict_symb)
	for k, v in dict_symb.items():
		dict_symb[k] = v.zfill(int(np.ceil(np.log2(n_symb))))

log_size = lambda x: np.ceil(np.log2(x))


Binary symbols: {'L': '00000', 'o': '00001', 'r': '00010', 'e': '00011', 'm': '00100', ' ': '00101', 'i': '00110', 'p': '00111', 's': '01000', 'u': '01001', 'd': '01010', 'l': '01011', 't': '01100', 'a': '01101', ',': '01110', 'c': '01111', 'n': '10000', 'g': '10001', '.': '10010', 'N': '10011', 'b': '10100', 'h': '10101', 'I': '10110', 'v': '10111'}


### Compression algorithm

In [38]:
def compress(msg: str, dict_symb: dict[str, str]) -> (list[str], int):
	i, length = 0, 0
	n_symb = len(dict_symb)
	coded_msg = []
	while i < len(msg):
		# Next coded string
		next_str = msg[i]
		# Same, but with extra character (for the dictionary)
		next_str_extra = msg[i]

		# Tries to fit the largest string possible in the dictionary
		while next_str_extra in dict_symb and i < len(msg):
			i += 1
			next_str = next_str_extra
			if i < len(msg): # If there is still characters to read
				next_str_extra += msg[i]

		# Coding of the string
		bin_code = dict_symb[next_str]
		coded_msg.append(bin_code)
		length += len(bin_code)
		# Adding the new string to the dictionary
		if i < len(msg):
			dict_symb[next_str_extra] = f"{n_symb:b}"
			n_symb += 1

		# Updating symbols size if necessary
		if log_size(n_symb) > len(coded_msg[-1]):
			dict_update_size(dict_symb)
	return coded_msg, length

In [51]:
import time
def run(index: int, verbose: bool = True):
    start = time.time()
    msg = load_file(index)
    loaded = time.time()
    if verbose:
        print(f"Message: {msg}")
    dict_symb = init_dict(msg)
    initial_length = int(log_size(len(dict_symb))*len(msg))
    dict_update_size(dict_symb)
    dict_loaded = time.time()
    if verbose:
        print(f"Binary symbols: {dict_symb}")
        print(f"Number of symbols: {len(dict_symb)}")
        print(f"Symbols: {list(dict_symb.keys())}")
        print(f"Binary symbols: {list(dict_symb.values())}")
        print(f"Initial length: {initial_length}")
    coded_msg, length = compress(msg, dict_symb)
    compressed = time.time()
    print(f"Coded message: {coded_msg}")
    print(f"Or : {''.join(coded_msg)}")

    if verbose:
        print("")
        print(f"Length = {length}")
        print(f"Original length = {initial_length}")
        print(f"Compression rate = {100 - length/initial_length*100:.2f}%")
        print(f"Compression factor = {initial_length/length:.2f}")

    if verbose:
        print("")
        print(f"Loading time: {(loaded-start)*10**3:.2f}ms")
        print(f"Dictionary loading time: {(dict_loaded-loaded)*10**3:.2f}ms")
        print(f"Compression time: {(compressed-dict_loaded)*10**3:.2f}ms")
        print(f"Total time: {(compressed-start)*10**3:.2f}ms")
run(4)


Message: Aliquam placerat dictum lectus vel pellentesque. Vestibulum semper dui lacus, sed commodo dolor maximus a. Praesent convallis in quam in posuere. Ut eleifend tortor a laoreet imperdiet. Sed pharetra risus sit amet velit cursus semper. Nam congue ac lectus sit amet laoreet. Integer quis lacinia turpis. Maecenas erat orci, porttitor ac ligula dapibus, interdum fringilla nunc. Proin auctor, arcu ac fermentum ultricies, erat justo laoreet nunc, sit amet lacinia lorem elit vel dolor. Aenean fermentum, mi sed iaculis laoreet, mauris nibh accumsan mauris, imperdiet pretium dui enim sit amet massa. Quisque sit amet tristique eros, eget lacinia tellus. Nam quis risus maximus, laoreet felis sit amet, efficitur neque. In pharetra urna at erat facilisis lobortis a eu orci.Donec suscipit, mi et porttitor ullamcorper, dui nisi mollis tortor, a facilisis elit enim ut metus. Cras sed dui sit amet metus sodales pretium at vitae magna. Donec ut efficitur leo, sit amet malesuada arcu. Mauris viv