In [1]:
import numpy as np
import os
import torch
import train_network as train_network
import utils as utils
import adult_network as adult_network
import dataloader as dataloader
import copy
import attack as attack

In [2]:
SUBSAMPLE_SIZE = 2500
SETS = 4
BATCH_SIZE = 8
NUM_ITER = 1

In [3]:
ROOT_DIR = os.path.dirname(os.getcwd())
EXPERIMENT_DIR = ROOT_DIR + "/experiments/adult"
MODEL_PATH = ROOT_DIR + "/saved_models"

In [4]:
def create_models(in_features, num=4):
    layer_sizes = [20, 20, 20, 20, 2]
    models = []
    for i in range(0, 4):
        model = adult_network.AdultNetwork(in_features, layer_sizes)
        models.append(model)
    return models

In [5]:
def train_models(models, dataloaders, model_infos, log_tensorboard=False):
    for idx, model in enumerate(models):
        train_loader = dataloaders[idx][0]
        info = model_infos[idx]
        model, train_losses, train_accuracies = train_network.train(
                model,
                train_loader,
                idx,
                SUBSAMPLE_SIZE,
                log_tensorboard
                )
        info['training_losses'] = train_losses
        info['training_accuracies'] = train_accuracies
        info['model_params'] = model.state_dict()

    return model_infos

In [6]:
def test_models(models, dataloaders, model_infos):
    losses_accuracies = []
    for idx, model in enumerate(models):
        test_loader = dataloaders[idx][1]
        info = model_infos[idx]
        test_loss, test_accuracy = train_network.test(
                model,
                info['model_params'],
                idx,
                test_loader,
                SUBSAMPLE_SIZE
                )
        info['test_loss'] = test_loss
        info['test_accuracy'] = test_accuracy

    return model_infos

In [7]:
def get_model_attributions(models, dataloaders, model_infos):
    model_attributions = []

    for idx, (model, dataloader) in enumerate(list(zip(models, dataloaders))):
        train_loader = dataloader[0]
        test_loader = dataloader[1]
        info = model_infos[idx]

        info['train_attributions'] =  train_network.get_attributions(
                model,
                info['model_params'],
                idx,
                train_loader
                )
        info['test_attributions'] = train_network.get_attributions(
                model,
                info['model_params'],
                idx,
                test_loader
                )

    return model_infos

In [8]:
def populate_train_test_sets(subsets, info, model_infos):
    for i, info in enumerate(model_infos):
        train = subsets[i]
        test = subsets[i + 1] if i + 1 < len(subsets) else subsets[0]
        train_normalize, test_normalize = dataloader.normalize(train, test)

        info['train_set'] = train_normalize
        info['test_set'] = test_normalize

    return model_infos

def init_model_infos(info, model_infos, sets):
    for i in range(0, sets):
        m_info = copy.deepcopy(info)
        m_info['model_name'] = 'model_' + str(i)

        model_infos.append(m_info)
    return model_infos

def save_model_infos(model_infos, new_dir_name, i):
    for info in model_infos:
        model_name = info['model_name']
        np.save(EXPERIMENT_DIR + new_dir_name + '/' + model_name, np.array(info))

def create_new_dir(name):
    if not os.path.exists(EXPERIMENT_DIR + name):
        os.mkdir(EXPERIMENT_DIR + name)

In [9]:
def run(retrain=False):
    model_infos = []
    info = {
            'model_name': None,
            'model_params': None,
            'train_set': None,
            'test_set': None,
            'training_losses': None,
            'training_accuracies': None,
            'test_loss': None,
            'test_accuracy': None,
            'train_attributions': None,
            'test_attributions': None
            }

    if retrain:
        X, Y = dataloader.process_data()
        for i in range(0, NUM_ITER):
            new_dir_name = '/iter_' + str(i)
            create_new_dir(new_dir_name)

            model_infos = init_model_infos(info, model_infos, SETS)

            in_features = X.shape[1]
            models = create_models(in_features)

            subsets = utils.subsample(X, Y, SUBSAMPLE_SIZE, SETS)
            model_infos = populate_train_test_sets(subsets, info, model_infos)

            save_model_infos(model_infos, new_dir_name, i)

            models_train_test = [(info['train_set'], info['test_set']) for info in model_infos]
            dataloaders = dataloader.get_loaders(models_train_test, BATCH_SIZE)

            model_infos = train_models(
                    models,
                    dataloaders,
                    model_infos,
                    log_tensorboard=True
                    )
            save_model_infos(model_infos, new_dir_name, i)

            model_infos = test_models(models, dataloaders, model_infos)
            save_model_infos(model_infos, new_dir_name, i)

            model_infos = get_model_attributions(models, dataloaders, model_infos)
            save_model_infos(model_infos, new_dir_name, i)

            return

In [10]:
run(False)

In [None]:
X, Y = dataloader.process_data()
in_features = X.shape[1]
save_model_fn = lambda model_infos: save_model_infos(model_infos, '/iter_0', 0)
model_infos = attack.run(0, in_features, save_model_fn)
save_model_infos(model_infos, '/iter_0', 0)

epoch: 0, iter: 0, loss: 0.6957281827926636
epoch: 0, iter: 150, loss: 0.6732441782951355
epoch: 0, iter: 300, loss: 0.6870831251144409
epoch: 0, iter: 450, loss: 0.7057802081108093
epoch: 0, iter: 600, loss: 0.697930097579956
epoch: 0, iter: 750, loss: 0.6910896897315979
epoch: 0, iter: 900, loss: 0.6863833665847778
epoch: 0, iter: 1050, loss: 0.6765116453170776
epoch: 0, iter: 1200, loss: 0.7075406908988953
epoch: 0, iter: 1350, loss: 0.6761417984962463
epoch: 0, iter: 1500, loss: 0.6970409154891968
Finish epoch 0, time elapsed 36.0098979473114
epoch: 1, iter: 0, loss: 0.6841506361961365
epoch: 1, iter: 150, loss: 0.6920298933982849
epoch: 1, iter: 300, loss: 0.7068306803703308
epoch: 1, iter: 450, loss: 0.6856793761253357
epoch: 1, iter: 600, loss: 0.7275334000587463
epoch: 1, iter: 750, loss: 0.7008898854255676
epoch: 1, iter: 900, loss: 0.6827049255371094
epoch: 1, iter: 1050, loss: 0.7009462118148804
epoch: 1, iter: 1200, loss: 0.715234637260437
epoch: 1, iter: 1350, loss: 0.7064

epoch: 14, iter: 1350, loss: 0.680939793586731
epoch: 14, iter: 1500, loss: 0.676382303237915
Finish epoch 14, time elapsed 297.3283669948578
epoch: 15, iter: 0, loss: 0.6586213707923889
epoch: 15, iter: 150, loss: 0.6989959478378296
epoch: 15, iter: 300, loss: 0.6864619851112366
epoch: 15, iter: 450, loss: 0.7065724730491638
epoch: 15, iter: 600, loss: 0.6970138549804688
epoch: 15, iter: 750, loss: 0.6940413117408752
epoch: 15, iter: 900, loss: 0.6928299069404602
epoch: 15, iter: 1050, loss: 0.6836569309234619
epoch: 15, iter: 1200, loss: 0.6891104578971863
epoch: 15, iter: 1350, loss: 0.7027093172073364
epoch: 15, iter: 1500, loss: 0.6946178674697876
Finish epoch 15, time elapsed 365.11209654808044
epoch: 16, iter: 0, loss: 0.7036877870559692
epoch: 16, iter: 150, loss: 0.6688421964645386
epoch: 16, iter: 300, loss: 0.7361324429512024
epoch: 16, iter: 450, loss: 0.6726380586624146
epoch: 16, iter: 600, loss: 0.6796327829360962
epoch: 16, iter: 750, loss: 0.7070803046226501
epoch: 16,

In [None]:
for i in range(0, NUM_ITER):
    iter_dir = '/iter_' + str(i)
    train_losses = []
    train_accuracies = []

    for j in range(0, 4):
        path = EXPERIMENT_DIR + iter_dir + '/model_' + str(j) + '.npy'
        model_info = np.load(path, allow_pickle=True)

        losses = model_info.item().get('training_losses')
        accu = model_info.item().get('training_accuracies')

        train_losses.append(losses)
        train_accuracies.append(accu)

    plt_dir = iter_dir + '/plots'
    create_new_dir(plt_dir)
    utils.plot_losses(train_losses, EXPERIMENT_DIR + plt_dir)
    utils.plot_accuracies(train_accuracies, EXPERIMENT_DIR + plt_dir)

In [None]:
print("Test Loss/ Accuracies")
for i in range(0, NUM_ITER):
    iter_dir = '/iter_' + str(i)
    test_losses = []
    test_acc = []

    for j in range(0, 4):
        path = EXPERIMENT_DIR + iter_dir + '/model_' + str(j) + '.npy'
        model_info = np.load(path, allow_pickle=True)
        losses = model_info.item().get('test_loss')
        acc = model_info.item().get('test_accuracy')
        test_losses.append(np.array([losses])[0])
        test_acc.append(np.array([acc])[0])

print(f'Losses {test_losses}')
print(f'Accuracies {test_acc}')