This notebook compares the performance of an AutoGluon-generated model with both a normal PyTorch model and a Quantization-Aware Training (QAT) PyTorch model.
It performs the following steps:
1. Load the dataset (train, test, and validation sets).
2. Load the AutoGluon predictor.
3. Load the replicated normal PyTorch model and the QAT PyTorch model.
4. Evaluate and compare the accuracy and log loss of all models.
5. Compare the complexity (number of parameters) and size of all models.
6. Visualize the results with plots.

Make sure to update the paths to the models and dataset accordingly.


Import necessary libraries

In [33]:
import os
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from autogluon.tabular import TabularPredictor, TabularDataset
import matplotlib.pyplot as plt
from torch.quantization import QuantStub, DeQuantStub, prepare_qat, convert

Paths, update if needed

In [19]:
predictor_path = './datasets/CICIDS2017/balanced_binary/automl_search'  # Update this path as necessary
data_dir = './datasets/CICIDS2017/balanced_binary'  # Update this path as necessary



Helper functions

In [42]:
def load_data(data_dir):
    train_data = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    test_data = pd.read_csv(os.path.join(data_dir, 'test.csv'))
    val_data = pd.read_csv(os.path.join(data_dir, 'validation.csv'))
    
    # Drop the ID column
    train_data = train_data.drop(columns=['ID'])
    test_data = test_data.drop(columns=['ID'])
    val_data = val_data.drop(columns=['ID'])
    
    # Encode labels
    label_encoder = LabelEncoder()
    train_data['Label'] = label_encoder.fit_transform(train_data['Label'])
    test_data['Label'] = label_encoder.transform(test_data['Label'])
    val_data['Label'] = label_encoder.transform(val_data['Label'])
    
    return train_data, test_data, val_data

def get_model_architecture(predictor, input_feature_size):
    best_model_name = predictor.get_model_best()
    best_model = predictor._trainer.load_model(best_model_name)
    
    architecture = []
    for name, layer in best_model.model.named_children():
        if isinstance(layer, nn.Sequential):
            for sub_layer in layer:
                architecture.append((type(sub_layer), sub_layer))
        else:
            architecture.append((type(layer), layer))
    
    return architecture, best_model, input_feature_size

class AutoReplicatedNN(nn.Module):
    def __init__(self, architecture, input_feature_size):
        super(AutoReplicatedNN, self).__init__()
        layers = []
        current_input_size = input_feature_size
        for layer_type, layer_obj in architecture:
            if layer_type == nn.BatchNorm1d:
                layers.append(nn.BatchNorm1d(current_input_size))
            elif layer_type == nn.Linear:
                layers.append(nn.Linear(current_input_size, layer_obj.out_features))
                current_input_size = layer_obj.out_features
            elif layer_type == nn.ReLU:
                layers.append(nn.ReLU())
            elif layer_type == nn.Dropout:
                layers.append(nn.Dropout(p=layer_obj.p))
            elif layer_type == nn.Softmax:
                layers.append(nn.Softmax(dim=layer_obj.dim))
            else:
                raise ValueError(f"Unhandled layer type: {layer_type}")
        self.main_block = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.main_block(x)

class QATWrapper(nn.Module):
    def __init__(self, model):
        super(QATWrapper, self).__init__()
        self.quant = QuantStub()
        self.model = model
        self.dequant = DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.model(x)
        x = self.dequant(x)
        return x
def evaluate_quantized_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    X_test_tensor_quant = torch.quantize_per_tensor(X_test_tensor, scale=1.0, zero_point=0, dtype=torch.quint8)
    with torch.no_grad():
        outputs = model(X_test_tensor_quant.dequantize())
        _, predicted = torch.max(outputs.data, 1)
        accuracy = accuracy_score(y_test_tensor, predicted)
    return accuracy
def evaluate_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = accuracy_score(y_test_tensor, predicted)
    return accuracy

Load data

In [22]:
train_data, test_data, val_data = load_data(data_dir)

# Convert to TabularDataset
#train_data_tab = TabularDataset(train_data)
#test_data_tab = TabularDataset(test_data)
#val_data_tab = TabularDataset(val_data)

Loading Autogluon predictor

In [23]:
# Load AutoGluon model
predictor = TabularPredictor.load(predictor_path)



Found 1 mismatches between original and current metadata:
	INFO: AutoGluon Python micro version mismatch (original=3.10.14, current=3.10.12)


Getting model architecture

In [24]:
input_feature_size = train_data.drop(columns=['Label']).shape[1]
architecture, best_model, input_feature_size = get_model_architecture(predictor, input_feature_size)

  best_model_name = predictor.get_model_best()


Preparing test data

In [25]:
X_test = test_data.drop(columns=['Label']).values
y_test = test_data['Label'].values

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

Loading models and evaluating accuracy

In [44]:
model_paths = {
    "AutoGluon": predictor_path,
    "Normal PyTorch": './datasets/CICIDS2017/balanced_binary/compressed_models/normal_pytorch_model.pth',  # Update with your normal model path
    "Pre-QAT PyTorch": './datasets/CICIDS2017/balanced_binary/compressed_models/pre_qat_pytorch_model.pth',  # Update with your pre-QAT model path
    "QAT PyTorch": './datasets/CICIDS2017/balanced_binary/compressed_models/qat_pytorch_model.pth'  # Update with your QAT model path
}

accuracies = {}

# Evaluate AutoGluon model
test_data_tab = TabularDataset(test_data)
accuracies["AutoGluon"] = predictor.evaluate(test_data_tab)['accuracy']

# Evaluate Normal PyTorch model
normal_model = AutoReplicatedNN(architecture, input_feature_size)
normal_model.load_state_dict(torch.load(model_paths["Normal PyTorch"]))
accuracies["Normal PyTorch"] = evaluate_model(normal_model, torch.tensor(test_data.drop(columns=['Label']).values, dtype=torch.float32), torch.tensor(test_data['Label'].values, dtype=torch.long))

# Evaluate Pre-QAT PyTorch model
pre_qat_model = AutoReplicatedNN(architecture, input_feature_size)
pre_qat_model.load_state_dict(torch.load(model_paths["Pre-QAT PyTorch"]))
pre_qat_model.eval()
accuracies["Pre-QAT PyTorch"] = evaluate_model(pre_qat_model, X_test_tensor, y_test_tensor)

# Evaluate QAT PyTorch model
qat_model_base = AutoReplicatedNN(architecture, input_feature_size)
qat_model = QATWrapper(qat_model_base)
qat_model.load_state_dict(torch.load(model_paths["QAT PyTorch"]), strict=False)
qat_model = convert(qat_model)
qat_model.eval()
accuracies["QAT PyTorch"] = evaluate_quantized_model(qat_model, X_test_tensor, y_test_tensor)



Printing accuracies

In [46]:
print("Model Accuracies:")
for model_name, accuracy in accuracies.items():
    print(f"{model_name}: {accuracy:.4f}")

Model Accuracies:
AutoGluon: 0.9925
Normal PyTorch: 0.9788
Pre-QAT PyTorch: 0.5000
QAT PyTorch: 0.5000
