# DEVNET


## includes


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import regularizers, backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, Callback
from tensorflow.keras.optimizers import AdamW
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.datasets import load_svmlight_file
from joblib import Memory
import pandas as pd

# helper data

In [None]:
mem = Memory("./dataset/svm_data", verbose=0)

def dataLoading(path):
    df = pd.read_csv(path)
    labels = df['class'].values
    x = df.drop(['class'], axis=1).values
    return x, labels

@mem.cache
def get_data_from_svmlight_file(path):
    X, y = load_svmlight_file(path)
    return X.toarray(), y


## callbacks for auc-pr maximization


In [None]:
# track aupr on a validation set
# designed for anomaly detection tasks aupr more informative
# aupr stored in the logs after each epoch
class AUC_Callback(Callback):
    def __init__(self, x_val, y_val):
        super().__init__()
        self.x_val = x_val
        self.y_val = y_val

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.x_val, verbose=0)
        if y_pred.shape[-1] == 1:
            y_pred = y_pred.flatten()
        val_aupr = average_precision_score(self.y_val, y_pred)
        if logs is not None:
            logs['val_aupr'] = val_aupr


## deviation loss definition and network architecture


In [None]:
# create a deviation-based loss that pushes normal samples close to the reference distribution
# and forces anomalies to deviate by at least the specified margin
def create_deviation_loss(margin=5.0, ref_size=5000):
    ref = K.variable(np.random.normal(size=ref_size), dtype='float32')
    
    def deviation_loss(y_true, y_pred):
        y_true = K.cast(y_true, 'float32')
        dev = (y_pred - K.mean(ref)) / (K.std(ref) + K.epsilon())
        inlier_loss = K.abs(dev)
        outlier_loss = K.abs(K.maximum(margin - dev, 0.0))
        return K.mean((1 - y_true) * inlier_loss + y_true * outlier_loss)
    
    return deviation_loss


# define a deep deviation network with three hidden layers for complex data
def dev_network_d(input_shape):
    inp = Input(shape=input_shape)
    x = Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(inp)
    x = BatchNormalization()(x)
    x = Dense(250, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = BatchNormalization()(x)
    out = Dense(1, activation='linear')(x)
    return Model(inp, out)

# define a shallow deviation network with one hidden layer for simpler data
def dev_network_s(input_shape):
    inp = Input(shape=input_shape)
    x = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(inp)
    x = BatchNormalization()(x)
    out = Dense(1, activation='linear')(x)
    return Model(inp, out)

# define a linear deviation network without hidden layers for baseline comparison
def dev_network_linear(input_shape):
    inp = Input(shape=input_shape)
    out = Dense(1, activation='linear')(inp)
    return Model(inp, out)

# assemble the deviation network of specified depth, compile with AdamW optimizer and deviation loss
def deviation_network(input_shape, depth, lr, wd, margin):
    if depth == 4:
        model = dev_network_d(input_shape)
    elif depth == 2:
        model = dev_network_s(input_shape)
    elif depth == 1:
        model = dev_network_linear(input_shape)
    else:
        raise ValueError("Unsupported network depth")
    optimizer = AdamW(learning_rate=lr, weight_decay=wd)
    loss_fn = create_deviation_loss(margin)
    model.compile(loss=loss_fn, optimizer=optimizer)
    return model


## data preprocesssing

In [None]:
# this generator makes batches with equal mix of normal and outlier samples
# it picks half batch from outliers with replacement and half from inliers without replacement
# then shuffles and yields the data and labels indicating which are outliers
def batch_generator_sup(x, out_idx, in_idx, batch_size, rng):
    n_out_batch = max(1, batch_size // 2)
    while True:
        out_samples = rng.choice(out_idx, n_out_batch, replace=True)
        in_samples = rng.choice(in_idx, batch_size - n_out_batch, replace=False)
        batch_idx = np.concatenate([in_samples, out_samples])
        rng.shuffle(batch_idx)
        labels = np.isin(batch_idx, out_idx).astype(np.float32)
        yield x[batch_idx], labels

# create synthetic samples by mixing features 5% swapped
def inject_noise(seed, n_out, random_seed):
    rng = np.random.RandomState(random_seed)
    n_sample, dim = seed.shape
    swap_ratio = 0.05
    n_swap = int(dim * swap_ratio)

    i1 = rng.choice(n_sample, size=n_out, replace=True)
    i2 = rng.choice(n_sample, size=n_out, replace=True)
    idxs = rng.choice(dim, size=(n_out, n_swap), replace=True)

    noise = seed[i1].copy()
    rows = np.arange(n_out)[:, None]
    noise[rows, idxs] = seed[i2[:, None], idxs]

    return noise


## train + test


In [None]:
# whole training and testing for devnet on many files
# first load data, split it fair, scale features, make extra outlier samples, then fit model and record scores
# also use tools to stop early, change learning speed, and save best model by aupr check
def run_devnet(config):
    all_results = []
    scaler = StandardScaler()

    for fname in os.listdir(config.input_path):
        if not fname.endswith('.csv'):
            continue
        name = fname.rsplit('.', 1)[0]
        x, y = dataLoading(os.path.join(config.input_path, fname))

        x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, stratify=y, random_state=config.random_seed)
        x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=config.random_seed)

        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)
        x_test = scaler.transform(x_test)

        out_idx = np.where(y_train == 1)[0]
        in_idx = np.where(y_train == 0)[0]
        if len(out_idx) > config.known_outliers:
            drop = np.random.choice(out_idx, len(out_idx) - config.known_outliers, replace=False)
            keep = np.setdiff1d(np.arange(len(y_train)), drop)
            x_train, y_train = x_train[keep], y_train[keep]
            out_idx = np.where(y_train == 1)[0]
            in_idx = np.where(y_train == 0)[0]

        n_noise = int(len(in_idx) * config.cont_rate / (1 - config.cont_rate))
        synth = inject_noise(x_train[out_idx], n_noise, config.random_seed)
        x_train = np.vstack([x_train, synth])
        y_train = np.concatenate([y_train, np.zeros(n_noise)])
        in_idx = np.where(y_train == 0)[0]

        model = deviation_network(
            input_shape=(x_train.shape[1],),
            depth=config.network_depth,
            lr=config.lr,
            wd=config.weight_decay,
            margin=config.margin
        )

        ckpt = ModelCheckpoint(f"./model/devnet_{name}.keras", save_best_only=True, monitor='val_aupr', mode='max')
        es_cb = EarlyStopping(monitor='val_aupr', mode='max', patience=10, restore_best_weights=True)
        lr_cb = ReduceLROnPlateau(monitor='val_aupr', mode='max', factor=0.5, patience=5, min_lr=1e-6)
        auc_cb = AUC_Callback(x_val, y_val)

        steps = max(1, len(in_idx) // config.batch_size)
        model.fit(
            batch_generator_sup(x_train, out_idx, in_idx, config.batch_size, np.random),
            steps_per_epoch=steps,
            epochs=config.epochs,
            validation_data=(x_val, y_val),
            callbacks=[auc_cb, ckpt, lr_cb, es_cb],
            verbose=0
        )

        y_score = model.predict(x_test)
        roc = roc_auc_score(y_test, y_score)
        aupr = average_precision_score(y_test, y_score)
        print(f"{name}: ROC AUC={roc:.4f}, AUPR={aupr:.4f}")
        all_results.append({'dataset': name, 'roc': roc, 'aupr': aupr})

    results_df = pd.DataFrame(all_results)
    output_csv_filename = "all_dataset_results6"
    results_df.to_csv(output_csv_filename, index=False)
    print("Summary results saved to", output_csv_filename)


## function call using configuration set as the following


In [8]:
# choose network size: 1=linear, 2=shallow, 4=deep deviation network
# known_outliers: num outliers to bbe kept
# contam = num outliers
# lr =  learning rate
# weight decay = L2 regularization
# margin = how far outliers must deviate from normal
if __name__ == "__main__":
    class Config:
        pass
    # various configs like contamination rate, network depth to choose netork, batch sixze, margin, weight decay, learn rate, etc
    cfg = Config()
    cfg.input_path = './dataset/'
    cfg.network_depth = 2
    cfg.known_outliers = 30
    cfg.cont_rate = 0.02
    cfg.batch_size = 512
    cfg.epochs = 60
    cfg.lr = 2e-3
    cfg.weight_decay = 2e-4
    cfg.margin = 5.0
    cfg.random_seed = 42

    run_devnet(cfg)


annthyroid_21feat_normalised: ROC AUC=0.9618, AUPR=0.8574
bank-additional-full_normalised: ROC AUC=0.7964, AUPR=0.4064
celeba_baldvsnonbald_normalised: ROC AUC=0.8843, AUPR=0.1987
census-income-full-mixed-binarized: ROC AUC=0.7042, AUPR=0.2434
creditcardfraud_normalised: ROC AUC=0.9347, AUPR=0.6714
KDD2014_donors_10feat_nomissing_normalised: ROC AUC=1.0000, AUPR=1.0000
UNSW_NB15_traintest_backdoor: ROC AUC=0.9654, AUPR=0.9151
Summary results saved to all_dataset_results6


# USING DIFFERENT PARAMETERS AND A FEW CHANGES IN THE MAIN CODE


In [None]:
import os
import numpy as np
import tensorflow as tf
from scipy.sparse import csc_matrix
from tensorflow.keras import regularizers, backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, Callback
from tensorflow.keras.optimizers import AdamW
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.datasets import load_svmlight_file
from joblib import Memory
import pandas as pd

mem = Memory("./dataset/svm_data", verbose=0)

def dataLoading(path):
    df = pd.read_csv(path)
    labels = df['class'].values
    x = df.drop(['class'], axis=1).values
    return x, labels

@mem.cache
def get_data_from_svmlight_file(path):
    X, y = load_svmlight_file(path)
    return X.toarray(), y

class AUC_Callback(Callback):
    def __init__(self, x_val, y_val):
        super().__init__()
        self.x_val = x_val
        self.y_val = y_val

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.x_val, verbose=0)
        if y_pred.shape[-1] == 1:
            y_pred = y_pred.flatten()
        val_aupr = average_precision_score(self.y_val, y_pred)
        if logs is not None:
            logs['val_aupr'] = val_aupr


# loss func

def create_deviation_loss(margin=5.0, ref_size=5000):
    ref = K.variable(np.random.normal(size=ref_size), dtype='float32')
    
    def deviation_loss(y_true, y_pred):
        y_true = K.cast(y_true, 'float32')  # Ensure y_true is float32
        dev = (y_pred - K.mean(ref)) / (K.std(ref) + K.epsilon())
        inlier_loss = K.abs(dev)
        outlier_loss = K.abs(K.maximum(margin - dev, 0.0))
        return K.mean((1 - y_true) * inlier_loss + y_true * outlier_loss)
    
    return deviation_loss


#multiple architec for testing
def dev_network_d(input_shape):
    inp = Input(shape=input_shape)
    x = Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(inp)
    x = BatchNormalization()(x)
    x = Dense(250, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = BatchNormalization()(x)
    out = Dense(1, activation='linear')(x)
    return Model(inp, out)

def dev_network_s(input_shape):
    inp = Input(shape=input_shape)
    x = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(inp)
    x = BatchNormalization()(x)
    out = Dense(1, activation='linear')(x)
    return Model(inp, out)

def dev_network_linear(input_shape):
    inp = Input(shape=input_shape)
    out = Dense(1, activation='linear')(inp)
    return Model(inp, out)

def deviation_network(input_shape, depth, lr, wd, margin):
    if depth == 4:
        model = dev_network_d(input_shape)
    elif depth == 2:
        model = dev_network_s(input_shape)
    elif depth == 1:
        model = dev_network_linear(input_shape)
    else:
        raise ValueError("Unsupported network depth")
    optimizer = AdamW(learning_rate=lr, weight_decay=wd)
    loss_fn = create_deviation_loss(margin)
    model.compile(loss=loss_fn, optimizer=optimizer)
    return model

# oversample strat added
def batch_generator_sup(x, out_idx, in_idx, batch_size, rng):
    n_out_batch = max(1, batch_size // 2)
    while True:
        out_samples = rng.choice(out_idx, n_out_batch, replace=True)
        in_samples = rng.choice(in_idx, batch_size - n_out_batch, replace=False)
        batch_idx = np.concatenate([in_samples, out_samples])
        rng.shuffle(batch_idx)
        labels = np.isin(batch_idx, out_idx).astype(np.float32)
        yield x[batch_idx], labels

# vectorized noise injection
def inject_noise(seed, n_out, random_seed):
    rng = np.random.RandomState(random_seed)
    n_sample, dim = seed.shape
    swap_ratio = 0.05
    n_swap = int(dim * swap_ratio)

    i1 = rng.choice(n_sample, size=n_out, replace=True)
    i2 = rng.choice(n_sample, size=n_out, replace=True)
    idxs = rng.choice(dim, size=(n_out, n_swap), replace=True)

    noise = seed[i1].copy()
    rows = np.arange(n_out)[:, None]
    noise[rows, idxs] = seed[i2[:, None], idxs]

    return noise

# network training and testing

def run_devnet(config):
    all_results = []
    scaler = StandardScaler()

    for fname in os.listdir(config.input_path):
        if not fname.endswith('.csv'):
            continue
        name = fname.rsplit('.', 1)[0]
        x, y = dataLoading(os.path.join(config.input_path, fname))

        x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, stratify=y, random_state=config.random_seed)
        x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=config.random_seed)

        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)
        x_test = scaler.transform(x_test)

        out_idx = np.where(y_train == 1)[0]
        in_idx = np.where(y_train == 0)[0]

        if len(out_idx) > config.known_outliers:
            drop = np.random.choice(out_idx, len(out_idx) - config.known_outliers, replace=False)
            keep = np.setdiff1d(np.arange(len(y_train)), drop)
            x_train, y_train = x_train[keep], y_train[keep]
            out_idx = np.where(y_train == 1)[0]
            in_idx = np.where(y_train == 0)[0]

        if len(out_idx) > 0
            n_synth_outliers = int(len(out_idx) * config.synth_factor)
            synth_outliers = inject_noise(x_train[out_idx], n_synth_outliers, config.random_seed)
            
            x_train = np.vstack([x_train, synth_outliers])
            y_train = np.concatenate([y_train, np.ones(n_synth_outliers)])

        n_synth_inliers = int(len(in_idx) * config.cont_rate / (1 - config.cont_rate))
        if n_synth_inliers > 0:
            synth_inliers = inject_noise(x_train[in_idx], n_synth_inliers, config.random_seed)
            x_train = np.vstack([x_train, synth_inliers])
            y_train = np.concatenate([y_train, np.zeros(n_synth_inliers)])

        out_idx = np.where(y_train == 1)[0]
        in_idx = np.where(y_train == 0)[0]
        model = deviation_network(
            input_shape=(x_train.shape[1],),
            depth=config.network_depth,
            lr=config.lr,
            wd=config.weight_decay,
            margin=config.margin
        )
        ckpt = ModelCheckpoint(f"./model/devnet_{name}.keras", save_best_only=True, monitor='val_aupr', mode='max')
        es_cb = EarlyStopping(monitor='val_aupr', mode='max', patience=10, restore_best_weights=True)
        lr_cb = ReduceLROnPlateau(monitor='val_aupr', mode='max', factor=0.5, patience=5, min_lr=1e-6)
        auc_cb = AUC_Callback(x_val, y_val)

        steps = max(1, len(in_idx) // config.batch_size)
        model.fit(
            batch_generator_sup(x_train, out_idx, in_idx, config.batch_size, np.random),
            steps_per_epoch=steps,
            epochs=config.epochs,
            validation_data=(x_val, y_val),
            callbacks=[auc_cb, ckpt, lr_cb, es_cb],
            verbose=0
        )

        # eval
        y_score = model.predict(x_test)
        roc = roc_auc_score(y_test, y_score)
        aupr = average_precision_score(y_test, y_score)
        print(f"{name}: ROC AUC={roc:.4f}, AUPR={aupr:.4f}")
        all_results.append({'dataset': name, 'roc': roc, 'aupr': aupr})

    # results
    results_df = pd.DataFrame(all_results)
    output_csv_filename = "all_dataset_results6"
    results_df.to_csv(output_csv_filename, index=False)
    print("Summary results saved to", output_csv_filename)

if __name__ == "__main__":
    class Config:
        pass
    
    cfg = Config()
    cfg.input_path = './dataset/'
    cfg.network_depth = 2
    cfg.known_outliers = 30
    cfg.cont_rate = 0.02 
    cfg.synth_factor = 0.5  # gen 50% synth outliers
    cfg.batch_size = 512
    cfg.epochs = 60
    cfg.lr = 2e-3
    cfg.weight_decay = 2e-4
    cfg.margin = 5.0
    cfg.random_seed = 42

    run_devnet(cfg)

annthyroid_21feat_normalised: ROC AUC=0.9886, AUPR=0.9235
bank-additional-full_normalised: ROC AUC=0.7992, AUPR=0.4185
celeba_baldvsnonbald_normalised: ROC AUC=0.8532, AUPR=0.2112
census-income-full-mixed-binarized: ROC AUC=0.7065, AUPR=0.2583
creditcardfraud_normalised: ROC AUC=0.9703, AUPR=0.6953
KDD2014_donors_10feat_nomissing_normalised: ROC AUC=1.0000, AUPR=0.9994
UNSW_NB15_traintest_backdoor: ROC AUC=0.9690, AUPR=0.9086
Summary results saved to all_dataset_results6
