In [None]:
!pip install gradio

Collecting gradio
  Using cached gradio-3.36.1-py3-none-any.whl (19.8 MB)
Collecting aiofiles (from gradio)
  Using cached aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting fastapi (from gradio)
  Using cached fastapi-0.100.0-py3-none-any.whl (65 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.3.0.tar.gz (4.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client>=0.2.7 (from gradio)
  Using cached gradio_client-0.2.7-py3-none-any.whl (288 kB)
Collecting httpx (from gradio)
  Using cached httpx-0.24.1-py3-none-any.whl (75 kB)
Collecting huggingface-hub>=0.14.0 (from gradio)
  Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
Collecting mdit-py-plugins<=0.3.3 (from gradio)
  Using cached mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)
Collecting orjson (from gradio)
  Downloading orjson-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.7/138.7 kB[

In [None]:
import gradio as gr
import time
import tempfile

# **Run Length Encoding**

In [None]:
def encode_rle(data):
    compressed_data = ''
    i = 0
    while i < len(data):
        count = 1
        while i + count < len(data) and data[i + count] == data[i]:
            count += 1
        if count > 2:  # Áp dụng nén RLE chỉ khi có ít nhất 3 ký tự lặp lại
            compressed_data += str(count) + data[i]
        else:  # Nếu chuỗi lặp lại ngắn, bỏ qua nén RLE và ghi lại từng ký tự
            compressed_data += data[i] * count
        i += count
    return compressed_data


def decode_rle(data):
    decoded_string = ''
    count = ''

    for char in data:
        if char.isdigit():
            count += char
        else:
            if count:
                decoded_string += char * int(count)
            else:
                decoded_string += char
            count = ''

    return decoded_string

**Input a string to encode or decode**

In [None]:
def rle_interface_str(input_string, mode):
    if mode == "Encode":
        start_time = time.time()
        result = encode_rle(input_string)
        end_time = time.time()
    else:
        start_time = time.time()
        result = decode_rle(input_string)
        end_time = time.time()
    execution_time = end_time - start_time
    return result, execution_time


iface = gr.Interface(
    fn=rle_interface_str,
    inputs=[
        gr.inputs.Textbox(lines=3, label="Input String"),
        gr.inputs.Radio(["Encode", "Decode"], label="Mode")
    ],
    outputs=[
        gr.outputs.Textbox(label="Output"),
        gr.outputs.Textbox(label="Execution time")],
    title="Run-Length Encoding Algorithm",
)

iface.launch()


  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



**Input a file to encode or decode**

In [None]:
def rle_interface_file(input_file, mode):
    with open(input_file.name, "r") as file:
        input_string = file.read()

    if mode == "Encode":
        start_time = time.time()
        result = encode_rle(input_string)
        end_time = time.time()
    else:
        start_time = time.time()
        result = decode_rle(input_string)
        end_time = time.time()
    execution_time = end_time - start_time
    with tempfile.NamedTemporaryFile(delete=False) as output_file:
        output_file.write(result.encode())
    #output_text = f"Execution Time: {execution_time} seconds\n Output File: {output_file.name})"
    return output_file.name, execution_time
    #return output_file.name


iface = gr.Interface(
    fn=rle_interface_file,
    inputs=[
        gr.inputs.File(label="Input File"),
        gr.inputs.Radio(["Encode", "Decode"], label="Mode")
    ],
    outputs=[
        gr.outputs.File(label="Output File"),
        gr.outputs.Textbox(label="Execution time")],
    title="Run-Length Encoding Algorithm",
)

iface.launch()


  gr.inputs.File(label="Input File"),
  gr.inputs.File(label="Input File"),
  gr.inputs.File(label="Input File"),
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.outputs.File(label="Output File"),
  gr.outputs.Textbox(label="Execution time")],


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



# **Lempel Ziv Welch**

In [None]:
import base64
import string
import random
import ast

In [None]:
def lzw_encode(txt):
    compressed = []
    dict_size = 256
    # 256 kí tự tương ứng vs mã ASCII
    dictionary = list(chr(x) for x in range(dict_size))

    s = txt[0]
    for i in range(1, len(txt)):
        c = txt[i]
        # Nếu s+c có trong dictionary thì cập nhật lại s
        if s+c in dictionary:
            s = s + c
        # Nếu không có thì s+c sẽ là từ mới được thêm vào từ điển và lưu với mã kí tự bắt đầu từ 256
        else:
            dictionary.append(s+c)
            compressed.append(dictionary.index(s))
            s = c
    compressed.append(dictionary.index(s))
    return (compressed)

def lzw_decode(compressed):
    dict_size = 256
    dictionary = dict((i, chr(i)) for i in range(dict_size))
    s = ''
    restore = ''
    # Lấy ra từng kí tự trong file nén
    for k in compressed:
        # Nếu k có trong từ điển thì entry sẽ bằng kí tự ban đầu được mã hóa tương ứng còn không thì entry sẽ bằng tập s + phần tử đầu tiên của tập s
        if k in dictionary:
            entry = dictionary[k]
        else:
            entry = s + s[0]
        # Thêm kí tự vừa được giải nén
        restore += entry
        # Nếu s không rỗng thì thêm s sẽ là từ mới thêm vào từ điển
        if s != '':
            dictionary[dict_size] = s + entry[0]
            dict_size += 1
        # cập nhật lại s
        s = entry
    return restore

**Input a string to encode or decode**

In [None]:
def lzw_interface_str(input_string, mode):
    if mode == "Encode":
        start_time = time.time()
        result = lzw_encode(input_string)
        end_time = time.time()
    else:
        start_time = time.time()
        result = lzw_decode(ast.literal_eval(input_string))
        end_time = time.time()
    execution_time = end_time - start_time
    return result, execution_time


iface = gr.Interface(
    fn=lzw_interface_str,
    inputs=[
        gr.inputs.Textbox(lines=3, label="Input String"),
        gr.inputs.Radio(["Encode", "Decode"], label="Mode")
    ],
    outputs=[
        gr.outputs.Textbox(label="Output"),
        gr.outputs.Textbox(label="Execution time")],
    title="LZW Algorithms",
)

iface.launch()

  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.outputs.Textbox(label="Output"),
  gr.outputs.Textbox(label="Execution time")],


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



**Input a file to encode or decode**

In [None]:
def lzw_interface_file(input_file, mode):
    with open(input_file.name, "r") as file:
        input_string = file.read()

    if mode == "Encode":
        start_time = time.time()
        result = lzw_encode(input_string)
        end_time = time.time()
    else:
        start_time = time.time()
        result = lzw_decode(ast.literal_eval(input_string))
        end_time = time.time()
    execution_time = end_time - start_time
    with tempfile.NamedTemporaryFile(delete=False) as output_file:
        output_file.write(str(result).encode())

    return output_file.name, execution_time


iface = gr.Interface(
    fn=lzw_interface_file,
    inputs=[
        gr.inputs.File(label="Input File"),
        gr.inputs.Radio(["Encode", "Decode"], label="Mode")
    ],
    outputs=[
        gr.outputs.File(label="Output File"),
        gr.outputs.Textbox(label="Execution time")],
    title="LZW Algorithm",
)

iface.launch()


  gr.inputs.File(label="Input File"),
  gr.inputs.File(label="Input File"),
  gr.inputs.File(label="Input File"),
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.inputs.Radio(["Encode", "Decode"], label="Mode")
  gr.outputs.File(label="Output File"),
  gr.outputs.Textbox(label="Execution time")],


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



# **Huffman**

In [None]:
import pandas as pd

In [None]:
# A Huffman Tree Node
class Node:
    def __init__(self, prob, symbol, left=None, right=None):
        # probability of symbol
        self.prob = prob

        # symbol
        self.symbol = symbol

        # left node
        self.left = left

        # right node
        self.right = right

        # tree direction (0/1)
        self.code = ''

In [None]:
codes = dict()

In [None]:
""" A helper function to print the codes of symbols by traveling Huffman Tree"""
def Calculate_Codes(node, val=''):
    # huffman code for current node
    newVal = val + str(node.code)

    if(node.left):
        Calculate_Codes(node.left, newVal)
    if(node.right):
        Calculate_Codes(node.right, newVal)

    if(not node.left and not node.right):
        codes[node.symbol] = newVal

    return codes

In [None]:
""" A helper function to calculate the probabilities of symbols in given data"""
def Calculate_Probability(data):
    symbols = dict()
    for element in data:
        if symbols.get(element) == None:
            symbols[element] = 1
        else:
            symbols[element] += 1
    return symbols

In [None]:
""" A helper function to obtain the encoded output"""
def Output_Encoded(data, coding):
    encoding_output = []
    for c in data:
      #  print(coding[c], end = '')
        encoding_output.append(coding[c])

    string = ''.join([str(item) for item in encoding_output])
    return string

In [None]:
""" A helper function to calculate the space difference between compressed and non compressed data"""
def Total_Gain(data, coding):
    before_compression = len(data) * 8 # total bit space to stor the data before compression
    after_compression = 0
    symbols = coding.keys()
    for symbol in symbols:
        count = data.count(symbol)
        after_compression += count * len(coding[symbol]) #calculate how many bit is required for that symbol in total
    print("Space usage before compression (in bits):", before_compression)
    print("Space usage after compression (in bits):",  after_compression)

In [None]:
def Huffman_Encoding(data):
    symbol_with_probs = Calculate_Probability(data)
    symbols = symbol_with_probs.keys()
    probabilities = symbol_with_probs.values()
    print("symbols: ", symbols)
    print("probabilities: ", probabilities)

    nodes = []

    # converting symbols and probabilities into huffman tree nodes
    for symbol in symbols:
        nodes.append(Node(symbol_with_probs.get(symbol), symbol))

    while len(nodes) > 1:
        # sort all the nodes in ascending order based on their probability
        nodes = sorted(nodes, key=lambda x: x.prob)
        # for node in nodes:
        #      print(node.symbol, node.prob)

        # pick 2 smallest nodes
        right = nodes[0]
        left = nodes[1]

        left.code = 0
        right.code = 1

        # combine the 2 smallest nodes to create new node
        newNode = Node(left.prob+right.prob, left.symbol+right.symbol, left, right)

        nodes.remove(left)
        nodes.remove(right)
        nodes.append(newNode)

    huffman_encoding = Calculate_Codes(nodes[0])
    print("symbols with codes", huffman_encoding)
    Total_Gain(data, huffman_encoding)
    encoded_output = Output_Encoded(data,huffman_encoding)
    return encoded_output, huffman_encoding, nodes[0]

In [None]:
def Huffman_Decoding(encoded_data, huffman_tree):
    tree_head = huffman_tree
    decoded_output = []
    for x in encoded_data:
        if x == '1':
            huffman_tree = huffman_tree.right
        elif x == '0':
            huffman_tree = huffman_tree.left
        try:
            if huffman_tree.left.symbol == None and huffman_tree.right.symbol == None:
                pass
        except AttributeError:
            decoded_output.append(huffman_tree.symbol)
            huffman_tree = tree_head

    string = ''.join([str(item) for item in decoded_output])
    return string

In [None]:
""" First Test """
data = "avvvavava"
print(data)
encodingg,coding, tree = Huffman_Encoding(data)
print("Encoded output", encodingg)
print("Encoded output", encodingg)
print("Decoded Output", Huffman_Decoding(encodingg,tree))

avvvavava
symbols:  dict_keys(['a', 'v'])
probabilities:  dict_values([4, 5])
symbols with codes {'v': '0', 'a': '1'}
Space usage before compression (in bits): 72
Space usage after compression (in bits): 9
Encoded output 100010101
Encoded output 100010101
Decoded Output avvvavava


In [None]:
def draw_table(input_dict):
    df = pd.DataFrame.from_dict(input_dict, orient='index', columns=["Value"])
    table_html = df.to_html()
    return table_html

**Input a string to Encode and Decode**

In [None]:
def Huffman_interface_str(input_string, mode):
        start_time = time.time()
        result, coding, tree = Huffman_Encoding(input_string)
        end_time = time.time()
        encode_execution_time = end_time - start_time


        start_time = time.time()
        result_decode = Huffman_Decoding(result,tree)
        end_time = time.time()
        decode_execution_time = end_time - start_time
        return result, encode_execution_time, result_decode, decode_execution_time

iface = gr.Interface(
    fn = Huffman_interface_str,
    inputs=[
        gr.inputs.Textbox(lines=3, label="Input String"),
        #gr.inputs.Radio(["Encode", "Decode"], label="Mode")
    ],
    outputs=[
        gr.outputs.Textbox(label="Output Encode"),
        gr.outputs.Textbox(label="Encode execution time"),
        #gr.outputs.HTML(),
        gr.outputs.Textbox(label="Output Decode"),
        gr.outputs.Textbox(label="Decode Execution time")],
    title="Huffman Coding Algorithms",
)

  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.inputs.Textbox(lines=3, label="Input String"),
  gr.outputs.Textbox(label="Output Encode"),
  gr.outputs.Textbox(label="Encode execution time"),
  gr.outputs.Textbox(label="Output Decode"),
  gr.outputs.Textbox(label="Decode Execution time")],


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



**Input a file to Encode and Decode**

In [None]:
def Huffman_interface_file(input_file):
        with open(input_file.name,"r") as file:
            input_string = file.read()

        start_time = time.time()
        result, coding, tree = Huffman_Encoding(input_string)
        end_time = time.time()
        encode_execution_time = end_time - start_time
        #dict_Huffman = draw_table(result[1])

        start_time = time.time()
        result_decode = Huffman_Decoding(result,tree)
        end_time = time.time()
        decode_execution_time = end_time - start_time

        with tempfile.NamedTemporaryFile(delete=False) as output_file_encode:
            output_file_encode.write(result.encode())

        with tempfile.NamedTemporaryFile(delete=False) as output_file_decode:
            output_file_decode.write(result_decode.encode())
        return output_file_encode.name, encode_execution_time,  output_file_decode.name, decode_execution_time


iface = gr.Interface(
    fn = Huffman_interface_file,
    inputs=[
        gr.inputs.File(label="Input File")
    ],
    outputs=[
        gr.outputs.File(label="Output Encode"),
        gr.outputs.Textbox(label="Encode execution time"),
        gr.outputs.File(label="Output Decode"),
        gr.outputs.Textbox(label="Decode Execution time")],
    title="Huffman Coding Algorithms",
)

  gr.inputs.File(label="Input File")
  gr.inputs.File(label="Input File")
  gr.inputs.File(label="Input File")
  gr.outputs.File(label="Output Encode"),
  gr.outputs.Textbox(label="Encode execution time"),
  gr.outputs.File(label="Output Decode"),
  gr.outputs.Textbox(label="Decode Execution time")],


In [None]:
if __name__ == "__main__":
  iface.launch()

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>