# Lab 4 Bonus

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# data paths
data_X_path = "cda_lab4_data/X.npy"
data_y_path = "cda_lab4_data/y.npy"
feature_names_path = "cda_lab4_data/feature_names.csv"

In [4]:
# load dataset
X = np.load(data_X_path)
print("X dimensions: ", X.shape)
y = np.load(data_y_path)
print("y dimensions: ", y.shape)

# load feature names
feature_names = pd.read_csv(feature_names_path, header=None)
print("Feature names: ", feature_names.head())

X dimensions:  (30396, 22761)
y dimensions:  (30396,)
Feature names:       0                               1
0  NaN                    feature_name
1  0.0        kernel32.dll:SetFileTime
2  1.0    kernel32.dll:CompareFileTime
3  2.0        kernel32.dll:SearchPathW
4  3.0  kernel32.dll:GetShortPathNameW


In [5]:
# split data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Train size: ", len(y_train))
print("Train malwares: ", np.sum(y_train == 1))
print()
print("Test size: ", len(y_test))
print("Test malwares: ", np.sum(y_test == 1))

Train size:  24316
Train malwares:  12168

Test size:  6080
Test malwares:  3042


In [6]:
# Define a simple neural network model
class SimpleModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(input_size, num_classes)
    
    def forward(self, x):
        return self.fc(x)

## Training Loop

In [7]:
def accuracy(outputs, targets):
    # Calculate accuracy
    _, predicted = torch.max(outputs, 1)
    total = targets.size(0)
    correct = (predicted == targets).sum().item()
    return total, correct

In [8]:
# Train step function
def train_step(model, data_loader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        total_loss += loss.item()

    accuracy = correct / total
    avg_loss = total_loss / len(data_loader)
    return avg_loss, accuracy

In [38]:
# Evaluate function
def evaluate(model, data_loader, loss_fn, device='cpu'):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            total_loss += loss.item()

    accuracy = correct / total
    avg_loss = total_loss / len(data_loader)
    return avg_loss, accuracy

In [14]:
# Train loop function
def train_loop(model, train_loader, test_loader, loss_fn, optimizer, epochs, device='cpu'):
    for epoch in range(epochs):
        train_loss, train_accuracy = train_step(model, train_loader, loss_fn, optimizer, device)
        test_loss, test_accuracy = evaluate(model, test_loader, loss_fn, device)
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
        print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
        print('-----------------------------------')

In [33]:
# Example usage
input_size = X.shape[1]  # Number of features
num_classes = 2  # Number of classes
model = SimpleModel(input_size, num_classes).to('cpu')
batch_size = 1000

# Cross entropy loss function
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


# Convert to PyTorch datasets
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.long))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# For simplicity, using the same dataset for testing
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.long))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [35]:
train_loop(model, train_loader, test_loader, loss_fn, optimizer, epochs=2)

Epoch 1/2:
Train Loss: 0.1719, Train Accuracy: 0.9207
Test Loss: 0.1657, Test Accuracy: 0.9215
-----------------------------------
Epoch 2/2:
Train Loss: 0.1707, Train Accuracy: 0.9217
Test Loss: 0.1744, Test Accuracy: 0.9209
-----------------------------------


In [75]:
def grams_topk_variant(b, model, loss_fn, target_labels, k_init=8, device='cpu'):
    # Convert numpy arrays to torch tensors
    b = torch.tensor(b, dtype=torch.float32, requires_grad=True).to(device)
    orig_x = b.clone().detach()
    best_x = b.clone().detach()
    target_labels = torch.tensor(target_labels, dtype=torch.long).to(device)
    k = k_init
    
    def compute_loss(x):
        model_output = model(x)
        return loss_fn(model_output, target_labels)
    
    while k > 0.5:
        print("k: ", k)
        # Compute loss
        loss = compute_loss(b)
        print('loss: ', loss)
        
        # Zero the gradients
        model.zero_grad()
        
        # Compute gradients
        loss.backward()
        grad = b.grad.data
        print('grad: ', grad)
        
        # Compute the sign of the gradient
        sign = grad.sign()
        print('sign: ', sign)
        
        # Adjust the gradient
        adjusted_grad = torch.abs(grad - orig_x * grad)
        
        # Get the top-k elements
        topk_indices = torch.topk(adjusted_grad, int(k)).indices
        
        # Update x with top-k elements and their signs
        x_new = b.clone().detach()
        x_new[topk_indices] = x_new[topk_indices] + sign[topk_indices]
        
        # Compute new loss
        new_loss = compute_loss(x_new)
        print('new_loss: ', new_loss)
        
        # Compute loss for the best observed x
        best_loss = compute_loss(best_x)
        print('best_loss: ', best_loss)
        
        # Update best_x if new loss is better
        if new_loss.item() > best_loss.item():
            best_x[topk_indices] = x_new[topk_indices]
            b = torch.tensor(x_new, requires_grad=True).to(device)
            k *= 2
        else:
            k /= 2
    
    return best_x

In [76]:
evaluate(model, test_loader, loss_fn=loss_fn)

(0.1744221800139972, 0.9208881578947369)

In [77]:
evaluate(model, train_loader, loss_fn=loss_fn)

(0.1758149480819702, 0.9217798980095411)

In [78]:
b = X_train[y_train == 1][0]
b = torch.tensor(b, dtype=torch.float32).to('cpu')
print('b:', b.shape)
target_labels = y_train[y_train == 1][0]

best_x = grams_topk_variant(b, model, loss_fn, target_labels)
print('original x:', model(b))
print("Best x:", model(best_x))

b: torch.Size([22761])
k:  8
loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
grad:  tensor([ 1.0346e-04, -2.0407e-04,  5.9553e-04,  ..., -7.2176e-06,
        -5.4561e-06, -1.8637e-05])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
new_loss:  tensor(4.6735, grad_fn=<NllLossBackward0>)
best_loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
k:  16
loss:  tensor(4.6735, grad_fn=<NllLossBackward0>)
grad:  tensor([ 0.0319, -0.0630,  0.1838,  ..., -0.0022, -0.0017, -0.0058])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
new_loss:  tensor(23.1323, grad_fn=<NllLossBackward0>)
best_loss:  tensor(4.6735, grad_fn=<NllLossBackward0>)
k:  32
loss:  tensor(23.1323, grad_fn=<NllLossBackward0>)
grad:  tensor([ 0.0322, -0.0636,  0.1856,  ..., -0.0022, -0.0017, -0.0058])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
new_loss:  tensor(56.5191, grad_fn=<NllLossBackward0>)
best_loss:  tensor(23.1323, grad_fn=<NllLossBackward0>)
k:  64
loss:  tensor(56.5191, grad_fn=<NllLossBackward0>)
gr

  b = torch.tensor(b, dtype=torch.float32, requires_grad=True).to(device)
  b = torch.tensor(x_new, requires_grad=True).to(device)


RuntimeError: selected index k out of range

In [59]:
best_xs = []
for i in range(10):
    b = X_train[y_train == 1][i]
    target_labels = y_train[y_train == 1][i]
    best_x = grams_topk_variant(b, model, loss_fn, target_labels)
    best_xs.append(best_x)

k:  8
loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
grad:  tensor([ 1.0346e-04, -2.0407e-04,  5.9553e-04,  ..., -7.2176e-06,
        -5.4561e-06, -1.8637e-05])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
k:  4.0
loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
grad:  tensor([ 2.0692e-04, -4.0813e-04,  1.1911e-03,  ..., -1.4435e-05,
        -1.0912e-05, -3.7275e-05])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
k:  2.0
loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
grad:  tensor([ 3.1039e-04, -6.1220e-04,  1.7866e-03,  ..., -2.1653e-05,
        -1.6368e-05, -5.5912e-05])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
k:  1.0
loss:  tensor(0.0032, grad_fn=<NllLossBackward0>)
grad:  tensor([ 4.1385e-04, -8.1627e-04,  2.3821e-03,  ..., -2.8871e-05,
        -2.1824e-05, -7.4549e-05])
sign:  tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
k:  8
loss:  tensor(0.0004, grad_fn=<NllLossBackward0>)
grad:  tensor([ 1.2706e-05, -2.5062e-05,  7.3137e-05,  ..., -8.8632e-07,
 

In [56]:
target_labels = y_train[y_train == 1][:10]
evaluate_model(model, best_xs, target_labels)

1.0

In [57]:
best_xs = X_train[y_train == 1][:10]
target_labels = y_train[y_train == 1][:10]
evaluate_model(model, best_xs, target_labels)

1.0