<a href="https://colab.research.google.com/github/ericyoc/win_entropy_packing/blob/main/calc_win_entropy_packing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#!pip install pefile

In [3]:
import math
import pefile
import subprocess

In [5]:
#!apt-get install upx-ucl

In [6]:
def calculate_entropy(data):
    byte_counts = [0] * 256
    for byte in data:
        byte_counts[byte] += 1

    entropy = 0
    for count in byte_counts:
        if count > 0:
            probability = float(count) / len(data)
            entropy -= probability * math.log2(probability)

    return entropy

In [7]:
def get_entropy_level(entropy):
    if entropy > 6.5:
        return "High"
    elif entropy > 5.0:
        return "Medium"
    else:
        return "Low"

In [8]:
def is_packed(pe):
    # Check for common packer signatures
    packers = ["UPX", "ASPack", "PECompact", "Themida", "VMProtect"]
    for packer in packers:
        if packer.encode() in pe.get_data():
            return True

    # Check for high entropy in sections
    for section in pe.sections:
        entropy = calculate_entropy(section.get_data())
        if entropy > 6.5:
            return True

    return False


In [9]:
def pack_with_upx(file_path):
    try:
        # Pack the file using UPX-UCL
        subprocess.run(["upx", "--best", file_path], check=True)
        print("File packed successfully with UPX-UCL.")
    except subprocess.CalledProcessError:
        print("Failed to pack the file with UPX-UCL.")
    except FileNotFoundError:
        print("UPX-UCL executable not found. Make sure UPX-UCL is installed and in the system PATH.")

In [10]:
def analyze_file(file_path):
    try:
        pe = pefile.PE(file_path)

        if pe.FILE_HEADER.Machine == 0x14c:
            arch = "32-bit"
        elif pe.FILE_HEADER.Machine == 0x8664:
            arch = "64-bit"
        else:
            arch = "Unknown"

        data = open(file_path, "rb").read()
        entropy = calculate_entropy(data)
        entropy_level = get_entropy_level(entropy)
        packed = is_packed(pe)

        print(f"File: {file_path}")
        print(f"Architecture: {arch}")
        print(f"Entropy: {entropy:.4f}")
        print(f"Entropy Level: {entropy_level}")
        print(f"Packed: {'Yes' if packed else 'No'}")

        print("Section Names:")
        for section in pe.sections:
            section_name = section.Name.decode(errors='ignore').strip('\x00')
            print(f"- {section_name}")

        # Explanation of entropy levels and packed files
        print("\nExplanation:")
        if entropy_level == "Low":
            print("Low entropy indicates that the file has a more predictable and less random distribution of bytes.")
            print("This is common for executables that contain plain text, code, or data without much obfuscation.")
        elif entropy_level == "Medium":
            print("Medium entropy suggests that the file has a moderate level of randomness in its byte distribution.")
            print("This can be observed in executables with a mix of plain text, code, and compressed or encrypted data.")
        else:  # High entropy
            print("High entropy indicates a highly random and unpredictable distribution of bytes in the file.")
            print("This is often associated with encrypted, compressed, or obfuscated data, which is common in packed executables.")

        if packed:
            print("\nThe file is detected as packed, but the entropy may be lower compared to the original file.")
            print("This is because packers like UPX-UCL compress the executable, resulting in a more uniform byte distribution.")
            print("The compressed data, although appearing more structured, can still have a high entropy due to the packing process.")

    except pefile.PEFormatError:
        print("Invalid PE file format.")
    except FileNotFoundError:
        print("File not found.")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [11]:
def main():
    file_path = "/content/calc.exe"

    print("Original File:")
    analyze_file(file_path)

    print("\nPacking the file with UPX-UCL...")
    pack_with_upx(file_path)

    print("\nPacked File:")
    analyze_file(file_path)

In [12]:
if __name__ == "__main__":
    main()

Original File:
File: /content/calc.exe
Architecture: 64-bit
Entropy: 2.9665
Entropy Level: Low
Packed: No
Section Names:
- .text
- .rdata
- .data
- .pdata
- .rsrc
- .reloc

Explanation:
Low entropy indicates that the file has a more predictable and less random distribution of bytes.
This is common for executables that contain plain text, code, or data without much obfuscation.

Packing the file with UPX-UCL...
File packed successfully with UPX-UCL.

Packed File:
File: /content/calc.exe
Architecture: 64-bit
Entropy: 4.3683
Entropy Level: Low
Packed: Yes
Section Names:
- UPX0
- UPX1
- .rsrc

Explanation:
Low entropy indicates that the file has a more predictable and less random distribution of bytes.
This is common for executables that contain plain text, code, or data without much obfuscation.

The file is detected as packed, but the entropy may be lower compared to the original file.
This is because packers like UPX-UCL compress the executable, resulting in a more uniform byte distribu