In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
import copy

import torch
import torchtext

from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split

from pathlib import Path
from utils.preprocessing_utils import load_dataset, apply_scalers_to_dataframe

from prototypes.torch_apk_analysis_model import (
    get_best_available_device,
    evaluate_model_on_test_set,
    collate_fn,
    NNHyperparams,
    ApkAnalysisDataset,
)

from prototypes.torch_apk_analysis_model_io import (
    load_apk_analysis_model_from_version,
)

torchtext.disable_torchtext_deprecation_warning()



In [None]:
SEQUENCE_COLS = [
    "activities_list",
    "services_list",
    "receivers_list",
    "permissions_list",
    "api_calls_list",
]

CHAR_COLS = ["fuzzy_hash"]
VECTOR_COLS = ["opcode_counts"]
SCALAR_COLS = ["file_size"]
VECTOR_DIMS = {"opcode_counts": 768}

PROJECT_ROOT = Path(__file__).parent.parent.parent
PATH_TO_DATASET_DIR = PROJECT_ROOT / "dataset"
PATH_TO_SAVE_NN_MODEL = PROJECT_ROOT / "model_artifacts" / "nn_models"
PATH_TO_SAVE_ML_MODEL = PROJECT_ROOT / "model_artifacts" / "ml_models"

# Load dataset
df, vocab_dict = load_dataset(
    PATH_TO_DATASET_DIR,
    SEQUENCE_COLS,
    CHAR_COLS,
    VECTOR_COLS,
    SCALAR_COLS,
    VECTOR_DIMS,
    load_fresh=False,
    sample_size=None,
)

df, df_test = train_test_split(
    df, test_size=0.1, random_state=42, stratify=df["is_malware"]
)

device = get_best_available_device()

Loading last preprocessed dataset...
Using CUDA device: NVIDIA GeForce RTX 4070 SUPER



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start(

In [None]:
nn_hyperparams = NNHyperparams(
    batch_size=64,
    max_learning_rate=6e-3,
    epochs=20,
    early_stopping=True,
    patience=5,
    optimizer="adamw",
    weight_decay=8e-4,
    embedding_dim=64,
    hidden_dims=[128],
    dropout=0.5,
    seq_pooling="mean",
    n_classes=2,
    label_col="is_malware",
    dataloader_num_workers=2,
    dataloader_pin_memory=True,
    dataloader_persistent_workers=True,
    grad_scaler_max_norm=1.0,
)

model, vocab_dict, used_scalers, metadata = load_apk_analysis_model_from_version(base_dir=PATH_TO_SAVE_NN_MODEL)

Using CUDA device: NVIDIA GeForce RTX 4070 SUPER
Loading latest model version: 20250622_175541
Scalers loaded from ./model_artifacts/nn_models\20250622_175541\scalers.joblib
Model loaded from ./model_artifacts/nn_models\20250622_175541


In [5]:
# Dynamic quantization
model.to("cpu")
model.eval()

modules_to_quantize = {torch.nn.Linear, torch.nn.GRU}

quantized_model_dynamic = torch.quantization.quantize_dynamic(
    model,
    qconfig_spec=modules_to_quantize,
    dtype=torch.qint8,
)

print("Dynamic quantization complete.")

Dynamic quantization complete.


In [None]:
# Static quantization
model.to("cpu")
model.eval()

model_to_quantize = copy.deepcopy(model)
model_to_quantize.eval()

print("Setting up quantization configurations for each layer type...")
default_qconfig = torch.quantization.get_default_qconfig("fbgemm")

model_to_quantize.classifier.qconfig = default_qconfig
model_to_quantize.quant.qconfig = default_qconfig
model_to_quantize.dequant.qconfig = default_qconfig

print("Quantization configurations assigned.")
print("Preparing model for static quantization...")

quantized_model_static = torch.quantization.prepare(model_to_quantize, inplace=False)
quantized_model_static.eval()

print("\nCalibrating the model...")

df_calibration = df.sample(n=128, random_state=42)
apply_scalers_to_dataframe(
    df_calibration, SCALAR_COLS, VECTOR_COLS, used_scalers, fit_scalers=False
)

calibration_dataset = ApkAnalysisDataset(
    df=df_calibration,
    sequence_cols=SEQUENCE_COLS,
    scalar_cols=SCALAR_COLS,
    char_cols=CHAR_COLS,
    vector_cols=VECTOR_COLS,
    label_col="is_malware",
)

calibration_loader = DataLoader(
    calibration_dataset,
    batch_size=nn_hyperparams.batch_size,
    collate_fn=collate_fn,
    shuffle=False,
)

with torch.no_grad():
    for i, (
        seq_feats,
        char_feats,
        vector_feats,
        scalars,
        labels,
    ) in enumerate(calibration_loader):
        seq_feats = {k: v.to("cpu") for k, v in seq_feats.items()}
        char_feats = {k: v.to("cpu") for k, v in char_feats.items()}
        vector_feats = {k: v.to("cpu") for k, v in vector_feats.items()}
        scalars = {k: v.to("cpu") for k, v in scalars.items()}
        labels = labels.to("cpu")
        
        quantized_model_static(seq_feats, char_feats, vector_feats, scalars)
        print(f"  Calibration batch {i + 1}/{len(calibration_loader)}")
print("Calibration complete.")

print("\nConverting the model...")
quantized_model_static = torch.quantization.convert(
    quantized_model_static, inplace=False
)
quantized_model_static.eval()
print("Static quantization complete.")

Setting up quantization configurations for each layer type...
Quantization configurations assigned.
Preparing model for static quantization...

Calibrating the model...




  Calibration batch 1/2
  Calibration batch 2/2
Calibration complete.

Converting the model...
Static quantization complete.


In [7]:
print("\nEvaluating base model on CPU...")
base_results = evaluate_model_on_test_set(
    model=model,
    df_test=df_test,
    scalers=used_scalers,
    sequence_cols=SEQUENCE_COLS,
    scalar_cols=SCALAR_COLS,
    char_cols=CHAR_COLS,
    vector_cols=VECTOR_COLS,
    hyperparams=nn_hyperparams,
    device=torch.device("cpu"),
)

print("\nEvaluating base model on GPU...")
gpu_base_results = evaluate_model_on_test_set(
    model=model,
    df_test=df_test,
    scalers=used_scalers,
    sequence_cols=SEQUENCE_COLS,
    scalar_cols=SCALAR_COLS,
    char_cols=CHAR_COLS,
    vector_cols=VECTOR_COLS,
    hyperparams=nn_hyperparams,
    device=device,
)

print("\nEvaluating dynamic quantized model on CPU...")
dynamic_q_results = evaluate_model_on_test_set(
    model=quantized_model_dynamic,
    df_test=df_test,
    scalers=used_scalers,
    sequence_cols=SEQUENCE_COLS,
    scalar_cols=SCALAR_COLS,
    char_cols=CHAR_COLS,
    vector_cols=VECTOR_COLS,
    hyperparams=nn_hyperparams,
    device=torch.device("cpu"),
)

print("\nEvaluating static quantized model on CPU...")
static_q_results = evaluate_model_on_test_set(
    model=quantized_model_static,
    df_test=df_test,
    scalers=used_scalers,
    sequence_cols=SEQUENCE_COLS,
    scalar_cols=SCALAR_COLS,
    char_cols=CHAR_COLS,
    vector_cols=VECTOR_COLS,
    hyperparams=nn_hyperparams,
    device=torch.device("cpu"),
)


Evaluating base model on CPU...
--- Evaluating on Test Set ---

--- Test Set Evaluation Metrics ---
  Inference Time: 8.07 seconds
  Accuracy: 0.9760
  Precision binary: 0.9817
  Recall binary: 0.9699
  F1 binary: 0.9758
  Precision weighted: 0.9760
  Recall weighted: 0.9760
  F1 weighted: 0.9760
  Confusion Matrix:
[[982  18]
 [ 30 968]]
  Inference time: 8.0653
  Roc auc: 0.9962
  Pr auc: 0.9967
---------------------------------

Evaluating base model on GPU...
--- Evaluating on Test Set ---

--- Test Set Evaluation Metrics ---
  Inference Time: 6.96 seconds
  Accuracy: 0.9760
  Precision binary: 0.9817
  Recall binary: 0.9699
  F1 binary: 0.9758
  Precision weighted: 0.9760
  Recall weighted: 0.9760
  F1 weighted: 0.9760
  Confusion Matrix:
[[982  18]
 [ 30 968]]
  Inference time: 6.9587
  Roc auc: 0.9952
  Pr auc: 0.9939
---------------------------------

Evaluating dynamic quantized model on CPU...
--- Evaluating on Test Set ---

--- Test Set Evaluation Metrics ---
  Inference Ti

In [8]:
# To get model size (example):
import os
import tempfile

try:
    # Save the original (unquantized) model for comparison
    fd, original_path = tempfile.mkstemp(suffix=".pth")
    os.close(fd)
    torch.save(model.state_dict(), original_path)
    original_model_size_kb = os.path.getsize(original_path) / 1024
    os.remove(original_path)
    print(f"Original (FP32) Model Size: {original_model_size_kb:.2f} KB")

    # Save the staticly quantized model object
    fd, original_path = tempfile.mkstemp(suffix=".pth")
    os.close(fd)
    torch.save(quantized_model_static.state_dict(), original_path)
    static_model_size_kb = os.path.getsize(original_path) / 1024
    os.remove(original_path)
    print(f"Statically Quantized (INT8) Model Size: {static_model_size_kb:.2f} KB")

    # Save the quantized model object
    fd, quantized_path = tempfile.mkstemp(suffix=".pth")
    os.close(fd)
    torch.save(quantized_model_dynamic, quantized_path)  # Save the whole model object
    dynamic_model_size_kb = os.path.getsize(quantized_path) / 1024
    os.remove(quantized_path)
    print(f"Dynamic Quantized (INT8) Model Size: {dynamic_model_size_kb:.2f} KB")

    # Calculate reduction
    if original_model_size_kb > 0:
        dynamic_reduction = (1 - (dynamic_model_size_kb / original_model_size_kb)) * 100
        static_reduction = (1 - (static_model_size_kb/ original_model_size_kb)) * 100
        print(f"Size Reduction (Dynamic Quantization): {dynamic_reduction:.2f}%")
        print(f"Size Reduction (Static Quantization): {static_reduction:.2f}%")

except Exception as e:
    print(f"Could not get model size: {e}")

Original (FP32) Model Size: 1034891.11 KB
Statically Quantized (INT8) Model Size: 1034727.38 KB
Dynamic Quantized (INT8) Model Size: 1034364.68 KB
Size Reduction (Dynamic Quantization): 0.05%
Size Reduction (Static Quantization): 0.02%
