In [1]:
import torch
from torch import nn, optim
import numpy as np

import sys
sys.path.append("./")
from pretrain import EmberMLP

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

import pickle
import logging
import random
import time

def set_seed(seed_value=1763):
    """Set seed for reproducibility."""
    logging.debug(f" [*] {time.ctime()}: Using random seed for all libraries: {seed_value}")
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

EMBER_FEATURE_DIM = 2381
BATCH_SIZE = 1024

RANDOM_SEED = 1991
set_seed(RANDOM_SEED)

# Training w/ Ember features

In [2]:
X_train = np.load(r"vectorize_output_1657871489\X_ember_trainset.npy")
y_train = np.load(r"vectorize_output_1657871489\y_ember_trainset.npy")
X_val = np.load(r"vectorize_output_1657871489\X_ember_valset.npy").astype(np.int8)
y_val = np.load(r"vectorize_output_1657871489\y_ember_valset.npy").astype(np.int8)

In [3]:
print(X_train.shape)
print(X_val.shape)

(74, 2381)
(31, 2381)


In [4]:
net = EmberMLP()
net(torch.Tensor(X_train))[0:4]

tensor([[295754.8125],
        [ 39226.1250],
        [ 51071.0000],
        [ 81800.4609]], grad_fn=<SliceBackward0>)

In [5]:
net = EmberMLP()
rr = net.get_representations(torch.Tensor(X_train))
rr.shape

torch.Size([74, 128])

# Training 

In [94]:
def train(model, device, train_loader, optimizer, loss_function, epoch_id, verbosity_batches=100):
    model.train()

    train_metrics = []
    train_loss = []
    now = time.time()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device).reshape(-1,1)
        
        optimizer.zero_grad()
        logits = model(data)
        
        loss = loss_function(logits, target)
        train_loss.append(loss.item())
        
        loss.backward() # derivatives
        optimizer.step() # parameter update

        preds = torch.argmax(logits, dim=1).flatten()
        
        accuracy = (preds == target).cpu().numpy().mean() * 100
        f1 = f1_score(target, preds)
        rocauc = roc_auc_score(target, preds)
        train_metrics.append([accuracy, f1, rocauc])
        
        if batch_idx % verbosity_batches == 0:
            logging.warning(" [*] {}: Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.2f} | Elapsed: {:.2f}s".format(
                time.ctime(), epoch_id, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), np.mean([x[0] for x in train_metrics]), time.time()-now))
            now = time.time()

    return train_loss, np.array(train_metrics).mean(axis=0).reshape(-1,3), logits, target

def evaluate(model, device, val_loader, loss_function):
    model.eval()

    val_metrics = []
    val_loss = []

    # For each batch in our validation set...
    for data, target in val_loader:
        data, target = data.to(device), target.to(device).reshape(-1,1)
        
        with torch.no_grad():
            logits = model(data)
        
        loss = loss_function(logits, target)
        val_loss.append(loss.item())

        preds = torch.argmax(logits, dim=1).flatten()

        accuracy = (preds == target).cpu().numpy().mean() * 100
        f1 = f1_score(target, preds)
        roc_auc = roc_auc_score(target, preds)
        val_metrics.append([accuracy, f1, roc_auc])
        
    return val_loss, np.array(val_metrics).mean(axis=0).reshape(-1,3)

def dump_results(model, train_losses, train_metrics, val_losses, val_metrics, duration, args, epoch):
    prefix = f"ep{epoch}-optim_{args.optimizer}-lr{args.learning_rate}-l2reg{args.l2}-dr{args.dropout}"
    prefix += f"_arr-ed{args.embedding_dim}-kb{args.keep_bytes}-pl{args.padding_length}"
    prefix += f"_model-conv{args.num_filters}-bn_c{args.batch_norm_conv}_f{args.batch_norm_ffnn}-ffnn{'_'.join([str(x) for x in args.hidden_layers])}"
    
    model_file = f"{prefix}-model.torch"
    torch.save(model.state_dict(), model_file)

    with open(f"{prefix}-train_losses.pickle", "wb") as f:
        pickle.dump(train_losses, f)
    
    with open(f"{prefix}-val_losses.pickle", "wb") as f:
        pickle.dump(val_losses, f)
    
    # in form [train_acc, train_f1]
    np.save(f"{prefix}-train_metrics.pickle", train_metrics)

    # in form [val_acc, val_f1]
    np.save(f"{prefix}-val_metrics.pickle", val_metrics)

    with open(f"{prefix}-duration.pickle", "wb") as f:
        pickle.dump(duration, f)

    dumpstring = f"""
     [!] {time.ctime()}: Dumped results:
            model: {model_file}
            train loss list: {prefix}-train_losses.pickle
            validation loss list: {prefix}-val_losses.pickle
            train metrics : {prefix}-train_metrics.pickle
            validation metrics : {prefix}-train_metrics.pickle
            duration: {prefix}-duration.pickle"""
    logging.warning(dumpstring)


In [108]:
model = EmberMLP()
aa = [x for x in model.parameters()]
aa[0]

Parameter containing:
tensor([[-1.8264e-02, -9.3230e-03,  1.6173e-02,  ..., -1.1918e-02,
          3.2062e-03,  4.9199e-03],
        [-4.5862e-03,  1.1049e-02, -1.4949e-02,  ...,  2.0208e-02,
          2.5320e-03, -3.8756e-04],
        [ 1.4658e-02,  1.3672e-02, -3.7788e-03,  ...,  1.4896e-02,
          3.5919e-03, -9.2345e-03],
        ...,
        [ 5.9809e-03,  6.0326e-04, -6.9606e-04,  ..., -1.8532e-02,
         -9.8626e-03,  1.2406e-02],
        [-1.1200e-02,  5.7746e-03,  1.9879e-02,  ...,  1.5798e-02,
          8.7864e-03, -5.8316e-03],
        [-1.6916e-02,  1.4591e-02,  7.5539e-03,  ...,  1.9885e-03,
         -2.0210e-05,  1.9130e-02]], requires_grad=True)

In [111]:
train_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train)),
    batch_size = BATCH_SIZE, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val)),
    batch_size = BATCH_SIZE, shuffle=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#model = EmberMLP()
model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=0.001)

loss_function = nn.CrossEntropyLoss()

train_losses = []
train_metrics = []
val_losses = []
val_metrics = []
duration = []

try:
    for epoch in range(1, 5):
        epoch_start_time = time.time()

        logging.warning(f" [*] {time.ctime()}: Started epoch: {epoch}")

        train_loss, train_m, logits, target = train(model, device, train_loader, optimizer, loss_function, epoch, 100)
        train_losses.extend(train_loss)
        train_metrics.append(train_m)

        # After the completion of each training epoch, measure the model's performance on validation set.
        val_loss, val_m = evaluate(model, device, val_loader, loss_function)
        val_losses.extend(val_loss)
        val_metrics.append(val_m)

        # Print performance over the entire training data
        time_elapsed = time.time() - epoch_start_time
        duration.append(time_elapsed)
        logging.warning(f" [*] {time.ctime()}: {epoch:^7} | Tr.loss: {np.mean(train_loss):^12.6f} | Tr.acc.: {np.mean([x[0] for x in train_m]):^9.2f} | Val.loss: {np.mean(val_loss):^10.6f} | Val.acc.: {np.mean([x[0] for x in val_m]):^9.2f} | {time_elapsed:^9.2f}")
    #dump_results(net, train_losses, np.vstack(train_metrics), val_losses, np.vstack(val_metrics), duration, args, epoch)
except KeyboardInterrupt as ex:
    print("interrupted")
    #dump_results(net, train_losses, train_metrics, val_losses, val_metrics, duration, args, epoch)




In [112]:
bb = [x for x in model.parameters()]
print(aa == bb)
bb[0]

True


Parameter containing:
tensor([[-1.7475e-02, -8.9198e-03,  1.5474e-02,  ..., -1.1403e-02,
          3.0675e-03,  4.7071e-03],
        [-4.3879e-03,  1.0571e-02, -1.4303e-02,  ...,  1.9334e-02,
          2.4225e-03, -3.7081e-04],
        [ 1.4024e-02,  1.3080e-02, -3.6154e-03,  ...,  1.4252e-02,
          3.4365e-03, -8.8352e-03],
        ...,
        [ 5.7223e-03,  5.7717e-04, -6.6597e-04,  ..., -1.7730e-02,
         -9.4362e-03,  1.1870e-02],
        [-1.0715e-02,  5.5249e-03,  1.9020e-02,  ...,  1.5115e-02,
          8.4065e-03, -5.5794e-03],
        [-1.6184e-02,  1.3960e-02,  7.2272e-03,  ...,  1.9025e-03,
         -1.9336e-05,  1.8303e-02]], requires_grad=True)