### Hufman


In [1]:
import json
import os
import time
import csv
from docx import Document
from PyPDF2 import PdfReader
import openpyxl

class Node:
    def __init__(self, char=None, freq=0):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

nodes = []

def calculate_frequencies(word):
    global nodes
    nodes = []
    frequencies = {}
    for char in word:
        if char not in frequencies:
            freq = word.count(char)
            frequencies[char] = freq
            nodes.append(Node(char, freq))

def build_huffman_tree():
    while len(nodes) > 1:
        nodes.sort(key=lambda x: x.freq)
        left = nodes.pop(0)
        right = nodes.pop(0)

        merged = Node(freq=left.freq + right.freq)
        merged.left = left
        merged.right = right

        nodes.append(merged)

    return nodes[0]

def generate_huffman_codes(node, current_code, codes):
    if node is None:
        return

    if node.char is not None:
        codes[node.char] = current_code

    generate_huffman_codes(node.left, current_code + '0', codes)
    generate_huffman_codes(node.right, current_code + '1', codes)

def huffman_encoding(word):
    calculate_frequencies(word)
    root = build_huffman_tree()
    codes = {}
    generate_huffman_codes(root, '', codes)
    return codes

def read_text_from_file(file_path):
    if file_path.endswith(".txt"):
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    elif file_path.endswith(".docx"):
        doc = Document(file_path)
        return "\n".join([para.text for para in doc.paragraphs])
    elif file_path.endswith(".pdf"):
        reader = PdfReader(file_path)
        return "\n".join([page.extract_text() for page in reader.pages])
    elif file_path.endswith(".csv"):
        with open(file_path, "r", encoding="utf-8") as file:
            csv_reader = csv.reader(file)
            text = ""
            for row in csv_reader:
                text += " | ".join(row) + "\n"  # Gabungkan kolom dalam setiap baris dengan separator
            return text
    elif file_path.endswith(".json"):
        with open(file_path, "r", encoding="utf-8") as file:
            data = json.load(file)
            return json.dumps(data, indent=4)  # Mengonversi objek JSON ke string yang terformat
    elif file_path.endswith(".xlsx"):
        return read_xlsx(file_path)
    else:
        raise ValueError("Format file tidak didukung.")

def read_xlsx(file_path):
    wb = openpyxl.load_workbook(file_path)
    sheet = wb.active
    text = ""
    for row in sheet.iter_rows(values_only=True):
        text += " | ".join([str(cell) for cell in row]) + "\n"
    return text

def save_compressed_file(encoded_word, output_path):
    padding_length = 8 - (len(encoded_word) % 8)
    if padding_length != 8:
        encoded_word += '0' * padding_length

    with open(output_path, "wb") as file:
        encoded_bytes = int(encoded_word, 2).to_bytes((len(encoded_word) + 7) // 8, byteorder="big")
        file.write(encoded_bytes)

    return padding_length

def compress_and_save(file_path):
    start_time = time.time()  # Waktu mulai
    original_text = read_text_from_file(file_path)
    codes = huffman_encoding(original_text)
    encoded_word = ''.join(codes[char] for char in original_text)
    original_size = os.path.getsize(file_path)
    compressed_size = len(encoded_word) // 8

    file_name, _ = os.path.splitext(file_path)
    output_path = f"{file_name}_compressed.bin"

    padding_length = save_compressed_file(encoded_word, output_path)

    compressed_data = {}
    if os.path.exists("compressed_files.json"):
        with open("compressed_files.json", "r") as f:
            compressed_data = json.load(f)

    compressed_data[encoded_word] = {
        "original_text": original_text,
        "huffman_codes": codes,
        "original_size": original_size,
        "compressed_size": compressed_size,
        "padding_length": padding_length
    }

    with open("compressed_files.json", "w") as f:
        json.dump(compressed_data, f, indent=4)

    end_time = time.time()  # Waktu selesai
    compression_time = end_time - start_time  # Durasi kompresi

    print(f"Teks berhasil dikompres dalam {compression_time:.2f} detik")
    print(f"Ukuran asli: {original_size} bytes, Ukuran setelah kompresi: {compressed_size} bytes")
    print(f"File terkompresi berhasil disimpan di {output_path}")

def read_compressed_file(input_path):
    with open(input_path, "rb") as file:
        encoded_bytes = file.read()
        encoded_bits = bin(int.from_bytes(encoded_bytes, byteorder="big"))[2:]
        return encoded_bits.zfill(len(encoded_bytes) * 8)

def decompress_compressed_file(input_path):
    start_time = time.time()  # Waktu mulai
    encoded_bits = read_compressed_file(input_path)

    try:
        with open("compressed_files.json", "r") as f:
            compressed_data = json.load(f)

        for encoded_data, details in compressed_data.items():
            padding_length = details.get("padding_length", 0)
            encoded_data_trimmed = encoded_data[:-padding_length] if padding_length else encoded_data

            if encoded_bits.startswith(encoded_data_trimmed):  # Matching prefix
                original_text = details["original_text"]
                end_time = time.time()  # Waktu selesai
                decompression_time = end_time - start_time  # Durasi dekompresi
                print(f"Dekompresi selesai dalam {decompression_time:.2f} detik")
                return original_text

        print("Data encoded tidak ditemukan dalam file 'compressed_files.json'.")

    except FileNotFoundError:
        print("File 'compressed_files.json' tidak ditemukan. Harap lakukan kompresi terlebih dahulu.")
    return None

def save_decompressed_file(original_text, input_path):
    file_name, _ = os.path.splitext(input_path)
    output_path = f"{file_name}_decompressed.txt"
    with open(output_path, "w", encoding="utf-8") as file:
        file.write(original_text)
    print(f"Hasil dekompresi telah disimpan ke {output_path}")

def main():
    choice = input("Pilih operasi:\n1. Kompres file\n2. Dekompres dari file terkompresi\nMasukkan pilihan (1 atau 2): ")

    if choice == '1':
        file_path = input("Masukkan path file yang ingin dikompres: ")
        compress_and_save(file_path)

    elif choice == '2':
        input_path = input("Masukkan path file terkompresi: ")
        original_text = decompress_compressed_file(input_path)
        if original_text:
            save_decompressed_file(original_text, input_path)

    else:
        print("Pilihan tidak valid. Silakan pilih 1 atau 2.")

if __name__ == "__main__":
    main()


Teks berhasil dikompres dalam 0.25 detik
Ukuran asli: 1823 bytes, Ukuran setelah kompresi: 1405 bytes
File terkompresi berhasil disimpan di D:\Projek DAA Baru\Dummy_compressed.bin
