In [None]:
# Adjustable Privacy - create_obfuscated_dataset_and_train_on.ipynb
# - Use an obfuscator to obfuscate dataset and create a new one. And then train a machine (srtong adversary or utilizer) on obfuscated (categorical) dataset to infer a specific feature.
# - Uses Categorical dataset UCI-Adult (private attr: gender, utility attr: income).
# - Load a specific trained obfuscator model (from google drive).
# - Saves trained models after each epoch number (to google drive and locally).
# - It can stop and resume training.
# - Draws loss and accuracy plots and saves them (to google drive).
# - For adversary test on obfuscated testset, and for utilizer test on original dataset.
# - Also it can load models and draw plots (from google drive).
# - Also it loads the weak adversary and evaluate it on obfuscated testset and reports its accuracy.
# - You can manage notebook parameters in parser block

In [None]:
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms, models
import numpy as np
from collections import OrderedDict
import time
from torch.utils.data import random_split
from math import floor
import torchvision.utils as vutils
import torch.nn.parallel
import torch.backends.cudnn as cudnn
from tqdm import tqdm
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
import itertools
import random
import shutil
from zipfile import ZipFile
import os
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import argparse

In [None]:
# Parser
parser = argparse.ArgumentParser(description='Adjustable Privacy - Use an obfuscator to obfuscate dataset and create a new one. And then train a machine (srtong adversary or utilizer) on obfuscated (categorical) dataset to infer a specific feature. '
                                 + 'Uses Categorical dataset UCI-Adult (private attr: gender, utility attr: income). '
                                 + 'Load a specific trained obfuscator model (from google drive). '
                                 + 'Saves trained models after each epoch number (to google drive and locally). '
                                 + 'It can stop and resume training.'
                                 + 'Draws loss and accuracy plots and saves them (to google drive and locally). '
                                 + 'Also it can load models and draw plots (from google drive). '
                                 + 'For adversary test on obfuscated testset, and for utilizer test on original dataset. '
                                 + 'Also it loads the weak adversary and evaluate it on obfuscated testset and reports its accuracy. ')

parser.add_argument('--resume', default = False, help = 'Accepts "True" or "False". ')
parser.add_argument('--last_epoch', type=int, default = 0, help = 'In case of resuming training use last saved epoch number and in case of loading a model, set to model number.')
parser.add_argument('--adversary_or_utilizer', type=str, default = 'utilizer', help = 'This model should train on obfuscated dataset as "utilizer" or "adversary"')
parser.add_argument('--save_path', type=str, required=True, help = 'Full path on your google drive to save model and plots. And also load from it. Like "drive/MyDrive/adjustable-privacy/Models/adv-or-utl/utl-uci-model0-g3000-f20/"')
parser.add_argument('--epoch_numbers', type=int, default = 20, help = 'Number of epochs to train model. (when you want load a model, it should set to that model number)')
parser.add_argument('--dataset_path', type=str, default = "", help = 'Full path on your google drive to adult.csv. Like "drive/MyDrive/adjustable-privacy/Datasets/"')
parser.add_argument('--model_number', type=int, required=True, help = 'The epoch number of desired obfuscator model (model number)')
parser.add_argument('--load_path', type=str, required=True, help = 'Full path on your google drive to load model from. Like "drive/MyDrive/adjustable-privacy/Models/categorical-Obfuscator/"')
parser.add_argument('--use_g', required=True, help = 'Accepts "True" or "False". Activate g function or not.')
parser.add_argument('--lambda_v', type=int, default = -50, help = 'Value of lambda (only when use_g=True)')
parser.add_argument('--noise', type=int, default = 0, help = 'Value of noise coefficient.')
parser.add_argument('--weak_adversary_model_path', type=str, required=True, help = 'Full path on your google drive to load weak adversary model from. Like "drive/MyDrive/adjustable-privacy/Models/categorical-Gender/"')
parser.add_argument('--weak_adversary_model_number', type=int, required=True, help = 'Weak Adversary model number')

command_string = "--resume False" \
" --last_epoch 0" \
" --adversary_or_utilizer utilizer" \
" --save_path drive/MyDrive/adjustable-privacy/Models/adv-or-utl/utl-uci-model169-g50-f100/" \
" --epoch_numbers 40" \
" --dataset_path drive/MyDrive/adjustable-privacy/Datasets/" \
" --model_number 169" \
" --load_path drive/MyDrive/adjustable-privacy/Models/categorical-Obfuscator/" \
" --use_g True" \
" --lambda_v -50" \
" --noise 100" \
" --weak_adversary_model_path drive/MyDrive/adjustable-privacy/Models/categorical-Gender/" \
" --weak_adversary_model_number 11"

args = parser.parse_args(command_string.split())

In [None]:
# Hyper parameters:
# For training utilizer or strong adversary:
advutl_isFirstRun = args.resume=='False'
advutl_lastRunEpochNumber = args.last_epoch
advutl_num_epochs = args.epoch_numbers 
manual_seed = 20
advutl_learning_rate = 0.001
advutl_batch_size = 64
files_not_ready = True
dataset_folder_path = args.dataset_path
data_dir = 'adult'
advutl_saving_path = args.save_path
suffling_train_data_for_advutl = True

if args.adversary_or_utilizer=='utilizer':
  is_adv = False
else:
  if args.adversary_or_utilizer=='adversary':
    is_adv = True

if is_adv:
  data_index = 1
else:
  data_index = 2

# Number of workers for dataloader
workers = 2
# Beta1 hyperparam for Adam optimizers
beta1 = 0.5
# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

suffling_train_data_for_obf = False
is_aware = False
use_g = args.use_g=='True'
g_eff_val = args.lambda_v
miu = 0
coef_for_var = args.noise

p2r_model_number = args.model_number
p2r_model_path = args.load_path
p2r_batch_size = 64

adv_model_number = args.weak_adversary_model_number
adv_model_path = args.weak_adversary_model_path

In [None]:
# Check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [None]:
# Mount google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# download dataset and unzip

if files_not_ready:
    dataset_csv_path = dataset_folder_path + '/adult.csv'

    try:
      os.mkdir(data_dir)
      print("data folder created successfully")
    except OSError as e:
      print("Error: %s" % (e.strerror))

    shutil.copyfile(dataset_csv_path, data_dir + r'/adult.csv')

try:
    os.mkdir(advutl_saving_path)
    print("saving_path directory created successfully")
except OSError as e:
    print("Error: %s" % (e.strerror))

In [None]:
#PreProcess dataset:
df = pd.read_csv(os.path.join(data_dir,'adult.csv'))
df = df.replace({'?':np.nan})
df = df.dropna()
df1 = pd.get_dummies(df)
train, test = train_test_split(df1, test_size = 0.2, random_state = 42)
utility_train_true_labels = np.array(train[['income_<=50K','income_>50K']])
utility_test_true_labels = np.array(test[['income_<=50K','income_>50K']])
private_train_true_labels = np.array(train[['gender_Male', 'gender_Female']])
private_test_true_labels = np.array(test[['gender_Male', 'gender_Female']])
main_x_train_df = (train.drop(['income_<=50K','income_>50K','gender_Male', 'gender_Female'],axis='columns'))
main_x_test_df = (test.drop(['income_<=50K','income_>50K','gender_Male', 'gender_Female'],axis='columns'))
standard_scaler = preprocessing.StandardScaler()
standard_scaler.fit(main_x_train_df)
main_x_train = standard_scaler.transform(main_x_train_df)
main_x_test = standard_scaler.transform(main_x_test_df)

In [None]:
from torch.utils.data import Dataset

class UciAdultDataset(Dataset):
    def __init__(self, X, Y_p, Y_u):#, transform):
        self.X = X
        self.Y_p = Y_p
        self.Y_u = Y_u
        
    def __len__(self):
        return len(self.Y_p)
    
    def __getitem__(self, idx):
        data = self.X[idx]
        label_p = self.Y_p[idx]
        label_u = self.Y_u[idx]
        data = torch.from_numpy(data)
        label_p = torch.from_numpy(label_p)
        label_u = torch.from_numpy(label_u)
        return data, label_p, label_u

In [None]:
# Load Datas
p2r_train_set = UciAdultDataset(main_x_train, private_train_true_labels, utility_train_true_labels)
p2r_test_set = UciAdultDataset(main_x_test, private_test_true_labels, utility_test_true_labels)

# DataLoader
p2r_train_loader = torch.utils.data.DataLoader(p2r_train_set, batch_size=p2r_batch_size, shuffle=suffling_train_data_for_obf, num_workers=workers, drop_last=True)
p2r_test_loader = torch.utils.data.DataLoader(p2r_test_set, batch_size=p2r_batch_size, shuffle=suffling_train_data_for_obf, num_workers=workers, drop_last=True)

In [None]:
# Decide which device we want to run on
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

In [None]:
# custom weights initialization
def weights_init(m):
  classname = m.__class__.__name__
  if classname.find('Linear') != -1:
    nn.init.normal_(m.weight.data, 0.0, 0.02)
    nn.init.constant_(m.bias.data, 0)

In [None]:
# Encoder Model
class Encoder(nn.Module):
    def __init__(self, ngpu):
        super(Encoder, self).__init__()
        self.ngpu = ngpu
        
        # input is 102
        self.fllc1 = nn.Linear(102, 128)
        self.actv1 = nn.ReLU(inplace=True)
        self.fllc2 = nn.Linear(128, 128)
        self.actv2 = nn.ReLU(inplace=True)
        self.fllc4 = nn.Linear(128, 64)
        self.actv4 = nn.ReLU(inplace=True)

        # split features: 128 -> 126 + 2
        # first classifier:
        self.fllc_main_features2 = nn.Linear(64, 32)
        self.actv_main_features2 = nn.ReLU(inplace=True)
        self.fllc_main_features3 = nn.Linear(32, 8)
        self.actv_main_features3 = nn.ReLU(inplace=True)
        self.fllc_main_features4 = nn.Linear(8, 2)
        self.actv_main_features4 = nn.LogSoftmax(dim=1)
        # other features
        self.fllc_other_features1 = nn.Linear(64, 64)
        self.actv_other_features1 = nn.ReLU(inplace=True)
        self.fllc_other_features2 = nn.Linear(64, 62)
        self.actv_other_features2 = nn.ReLU(inplace=True)

    def forward(self, x):
        # Part 1:
        x = self.fllc1(x)
        x = self.actv1(x)
        x = self.fllc2(x)
        x = self.actv2(x)
        x = self.fllc4(x)
        x = self.actv4(x)
        # Part 2:
        # first classifier: 
        y1 = self.fllc_main_features2(x)
        y1 = self.actv_main_features2(y1)
        y1 = self.fllc_main_features3(y1)
        y1 = self.actv_main_features3(y1)
        y1 = self.fllc_main_features4(y1)
        y1 = self.actv_main_features4(y1)
        # other features
        y3 = self.fllc_other_features1(x) 
        y3 = self.actv_other_features1(y3)
        y3 = self.fllc_other_features2(y3) 
        y3 = self.actv_other_features2(y3)
        return y1, y3

In [None]:
# Decoder Model
class Decoder(nn.Module):
    def __init__(self, ngpu):
        super(Decoder, self).__init__()
        self.ngpu = ngpu
        
        # input size is 64
        self.fllc1 = nn.Linear(64, 128)
        self.actv1 = nn.ReLU(inplace=True)
        self.fllc2 = nn.Linear(128, 128)
        self.actv2 = nn.ReLU(inplace=True)
        self.fllc4 = nn.Linear(128, 102)
        self.actv4 = nn.Sigmoid()

    def forward(self, x):
        x = self.fllc1(x)
        x = self.actv1(x)
        x = self.fllc2(x)
        x = self.actv2(x)
        x = self.fllc4(x)
        x = self.actv4(x)
        return x

In [None]:
# AE Model
class AEModel(nn.Module):
    def __init__(self, ngpu, mode='train', miu=0, coef_for_var=1, g_eff_val=-3000):
        super(AEModel, self).__init__()
        self.ngpu = ngpu
        self.g_eff_val = g_eff_val
        self.miu = miu
        self.coef_for_var = coef_for_var
        self.mode = mode
        self.encoder = Encoder(ngpu).to(device)
        self.decoder = Decoder(ngpu).to(device)

    def tune_noise(self, miu=0, coef_for_var=1, g_eff_val=-3000):
        self.miu = miu
        self.coef_for_var = coef_for_var
        self.g_eff_val = g_eff_val

    def change_mode(self, mode='train'):
        self.mode = mode

    def add_noise(self, nodes):
      with torch.no_grad():
        var = (self.coef_for_var) * (torch.mean(nodes).item())
        noise = self.miu + (var) * torch.randn(nodes.size())
        noise = noise.to(device)
        nodes.add_(noise)
        return nodes

    def change_lbl(self, nodes, lbls):
      with torch.no_grad():
        lbls[lbls == 0] = self.g_eff_val
        lbls[lbls == 1] = 0
        nodes = lbls
        return nodes

    def forward(self, x, y1_real_lbl=[]):
        y1, y3 = self.encoder(x)
        if self.mode=='use':
            if use_g:
              y1 = self.change_lbl(y1, y1_real_lbl)
            y3 = self.add_noise(y3)
        y = torch.cat((y1, y3), 1)
        x = self.decoder(y)
        return x, y1

In [None]:
# Create the AE
netAE = AEModel(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    netAE = nn.DataParallel(netAE, list(range(ngpu)))

In [None]:
# Function - Save:
def save_model(saving_path, name, number, model, res):
  checkpoint = {'res': res,
                'state_dict': model.state_dict()}
  torch.save(checkpoint, saving_path + 'checkpoint-' + name + '-' + str(number) + '.pth')
  return True

In [None]:
# Function - Load:
def load_model(saving_path, name, number, model, device):
  
  checkpoint = torch.load(saving_path + 'checkpoint-' + name + '-' + str(number) + '.pth', map_location=device)
  res = checkpoint['res']
  model.load_state_dict(checkpoint['state_dict'])
  return {'model':model,
          'res':res}

In [None]:
# Load p2r model:
ae_load = load_model(p2r_model_path, 'ae', p2r_model_number, netAE, device)

In [None]:
def extract_class_index(labels):
  return labels[:,0]

In [None]:
def extract_two_value_labels(labels):
  new_labels = torch.stack([labels[:,1],labels[:,0]],dim=1)
  return new_labels

In [None]:
def convert_dataset(my_loader, modified_x):
    prog_bar = tqdm(enumerate(my_loader), total=len(my_loader))
    with torch.no_grad():
        for i, data in prog_bar:
            inputs, labels = data[0], data[2]
            labels = extract_two_value_labels(labels)
            inputs, labels = inputs.to(torch.float32), labels.to(torch.float32)
            inputs, labels = inputs.to(device), labels.to(device)
            output, y1 = netAE.forward(inputs, labels)
            first = i*p2r_batch_size
            second = (i+1)*p2r_batch_size-1
            modified_x[first : second+1][:] = output.to('cpu')

In [None]:
def one_hot_encode_tensor(my_tensor):
  max_idx = torch.argmax(my_tensor, 1, keepdim=True)
  one_hot = torch.FloatTensor(my_tensor.shape)
  one_hot.zero_()
  one_hot.scatter_(1, max_idx, 1)
  return one_hot

In [None]:
def one_hot_encode_df(df):
  others_t = torch.tensor(df[["age", "fnlwgt", "educational-num", "capital-gain", "capital-loss", "hours-per-week"]].values)
  workclass_t = torch.tensor(df[["workclass_Federal-gov", "workclass_Local-gov", "workclass_Private", "workclass_Self-emp-inc", "workclass_Self-emp-not-inc", "workclass_State-gov", "workclass_Without-pay"]].values)
  education_t = torch.tensor(df[["education_10th", "education_11th", "education_12th", "education_1st-4th", "education_5th-6th", "education_7th-8th", "education_9th", "education_Assoc-acdm", "education_Assoc-voc", "education_Bachelors", "education_Doctorate", "education_HS-grad", "education_Masters", "education_Preschool", "education_Prof-school", "education_Some-college"]].values)
  marital_t = torch.tensor(df[["marital-status_Divorced", "marital-status_Married-AF-spouse", "marital-status_Married-civ-spouse", "marital-status_Married-spouse-absent", "marital-status_Never-married", "marital-status_Separated", "marital-status_Widowed"]].values)
  occupation_t = torch.tensor(df[["occupation_Adm-clerical", "occupation_Armed-Forces", "occupation_Craft-repair", "occupation_Exec-managerial", "occupation_Farming-fishing", "occupation_Handlers-cleaners", "occupation_Machine-op-inspct", "occupation_Other-service", "occupation_Priv-house-serv", "occupation_Prof-specialty", "occupation_Protective-serv", "occupation_Sales", "occupation_Tech-support", "occupation_Transport-moving"]].values)
  relationship_t = torch.tensor(df[["relationship_Husband", "relationship_Not-in-family", "relationship_Other-relative", "relationship_Own-child", "relationship_Unmarried", "relationship_Wife"]].values)
  race_t = torch.tensor(df[["race_Amer-Indian-Eskimo", "race_Asian-Pac-Islander", "race_Black", "race_Other", "race_White"]].values)
  country_t = torch.tensor(df[["native-country_Cambodia", "native-country_Canada", "native-country_China", "native-country_Columbia", "native-country_Cuba", "native-country_Dominican-Republic", "native-country_Ecuador", "native-country_El-Salvador", "native-country_England", "native-country_France", "native-country_Germany", "native-country_Greece", "native-country_Guatemala", "native-country_Haiti", "native-country_Holand-Netherlands", "native-country_Honduras", "native-country_Hong", "native-country_Hungary", "native-country_India", "native-country_Iran", "native-country_Ireland", "native-country_Italy", "native-country_Jamaica", "native-country_Japan", "native-country_Laos", "native-country_Mexico", "native-country_Nicaragua", "native-country_Outlying-US(Guam-USVI-etc)", "native-country_Peru", "native-country_Philippines", "native-country_Poland", "native-country_Portugal", "native-country_Puerto-Rico", "native-country_Scotland", "native-country_South", "native-country_Taiwan", "native-country_Thailand", "native-country_Trinadad&Tobago", "native-country_United-States", "native-country_Vietnam", "native-country_Yugoslavia"]].values)
  
  workclass_oh    = one_hot_encode_tensor(workclass_t)
  education_oh    = one_hot_encode_tensor(education_t)
  marital_oh      = one_hot_encode_tensor(marital_t)
  occupation_oh   = one_hot_encode_tensor(occupation_t)
  relationship_oh = one_hot_encode_tensor(relationship_t)
  race_oh         = one_hot_encode_tensor(race_t)
  country_oh      = one_hot_encode_tensor(country_t)

  final_tensor = torch.cat([others_t, workclass_oh, education_oh, marital_oh, occupation_oh, relationship_oh, race_oh, country_oh], dim=1)
  final_df = pd.DataFrame(data=final_tensor.numpy(), index=df.index, columns=df.columns)
  return final_df

In [None]:
def reconst_by_awareness(modified_x, main_x_df):
  modified_x_inverse = standard_scaler.inverse_transform(modified_x)
  modified_x_inverse_df = pd.DataFrame(data = modified_x_inverse, 
                  index = main_x_df.index, 
                  columns = main_x_df.columns)

  modified_x_inverse_df = one_hot_encode_df(modified_x_inverse_df)

  standard_scaler2 = preprocessing.StandardScaler()
  standard_scaler2.fit(modified_x_inverse_df)
  modified_x = standard_scaler2.transform(modified_x_inverse_df)

In [None]:
# using model to obfuscate dataset 

modified_x_train = np.zeros(main_x_train.shape)
modified_x_test = np.zeros(main_x_test.shape)

netAE.change_mode('use')
netAE.eval()
netAE.tune_noise(miu, coef_for_var, g_eff_val)
print("Converting train records...\n")
convert_dataset(p2r_train_loader, modified_x_train)
if is_aware:
  reconst_by_awareness(modified_x_train, main_x_train_df)
print("\nConverting test records...")
convert_dataset(p2r_test_loader, modified_x_test)
if is_aware:
  reconst_by_awareness(modified_x_test, main_x_test_df)

In [None]:
# Load Datas
advutl_train_set = UciAdultDataset(modified_x_train, private_train_true_labels, utility_train_true_labels)
advutl_test_set = UciAdultDataset(modified_x_test, private_test_true_labels, utility_test_true_labels)

# DataLoader
advutl_train_loader = torch.utils.data.DataLoader(advutl_train_set, batch_size=advutl_batch_size, shuffle=suffling_train_data_for_advutl, num_workers=workers, drop_last=True)
advutl_test_loader = torch.utils.data.DataLoader(advutl_test_set, batch_size=advutl_batch_size, shuffle=suffling_train_data_for_advutl, num_workers=workers, drop_last=True)

In [None]:
# AdvUtilizer Model
class AdvUtlModel(nn.Module):
    def __init__(self, ngpu):
        super(AdvUtlModel, self).__init__()
        self.ngpu = ngpu
        
        # input is 102
        # classifier:
        self.fllc1 = nn.Linear(102, 256)
        self.actv1 = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fllc2 = nn.Linear(256, 256)
        self.actv2 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout(p=0.3)
        self.fllc3 = nn.Linear(256, 128)
        self.actv3 = nn.ReLU(inplace=True)
        self.dropout3 = nn.Dropout(p=0.4)
        self.fllc4 = nn.Linear(128, 2)
        self.actv4 = nn.LogSoftmax(dim=1)

    def forward(self, x):
        y1 = self.fllc1(x)
        y1 = self.actv1(y1)
        y1 = self.dropout1(y1)
        y1 = self.fllc2(y1)
        y1 = self.actv2(y1)
        y1 = self.dropout2(y1)
        y1 = self.fllc3(y1)
        y1 = self.actv3(y1)
        y1 = self.dropout3(y1)
        y1 = self.fllc4(y1)
        y1 = self.actv4(y1)
        return y1


In [None]:
# Create the AdvUtl
advutilizerModel = AdvUtlModel(ngpu).to(device)
# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    advutilizerModel = nn.DataParallel(advutilizerModel, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
advutilizerModel.apply(weights_init)

In [None]:
# total parameters
total_params = sum(p.numel() for p in advutilizerModel.parameters())
print(f"{total_params:,} total parameters.")

In [None]:
advutilizerCriterion = nn.NLLLoss()
advutilizerOptimizer = optim.Adam(advutilizerModel.parameters(), lr=advutl_learning_rate, betas=(beta1, 0.999))

In [None]:
# Save Start Checkpoint

if(advutl_isFirstRun):
  advutl_res = {'train_losses': [],
             'valid_losses': [],
             'test_y1_acc': [],
             'epoch_number': 0,
           };
  save_model(advutl_saving_path, 'ins', 0, advutilizerModel, advutl_res)

In [None]:
# Load Last Checkpoint:
advutl_load = load_model(advutl_saving_path, 'ins', advutl_lastRunEpochNumber, advutilizerModel, device)

train_losses = advutl_load['res']['train_losses']
valid_losses = advutl_load['res']['valid_losses']
test_y1_acc = advutl_load['res']['test_y1_acc']
last_epoch = advutl_load['res']['epoch_number']

In [None]:
# Function - training function
def fit(model, train_loader, optimizer, criterion):
    model.train()

    train_loss = 0.0
    prog_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, data in prog_bar:
        inputs, labels = data[0], data[data_index]
        labels = extract_class_index(labels)
        inputs = inputs.to(torch.float32)
        inputs, labels = inputs.to(device), labels.to(device)
        model.zero_grad()
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()          
    train_loss = train_loss / len(train_loader)
    return train_loss

In [None]:
# Function - validation function
def validate(model, valid_loader, criterion):
    model.eval()
    valid_loss = 0.0

    prog_bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    with torch.no_grad():
        for i, data in prog_bar:
            inputs, labels = data[0], data[data_index]
            labels = extract_class_index(labels)
            inputs = inputs.to(torch.float32)
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
        valid_loss = valid_loss / len(valid_loader)
        return valid_loss

In [None]:
# Calc Accuracy
def calcAccuracyTest(model, test_loader):
    print('Testing')
    model.to(device)
    model.eval()
    y1_accuracy = 0
    prog_bar = tqdm(enumerate(test_loader), total=len(test_loader))
    with torch.no_grad():
        for i, data in prog_bar:
            inputs, labels = data[0], data[data_index]
            labels = extract_class_index(labels)
            inputs = inputs.to(torch.float32)
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)
            ps_y1 = torch.exp(output)
            top_p_y1, top_class_y1 = ps_y1.topk(1, dim=1)
            equals_y1 = top_class_y1 == labels.view(*top_class_y1.shape)
            acc_y1 = equals_y1.sum().item()
            y1_accuracy += (acc_y1 / len(equals_y1))            
    y1_accuracy = y1_accuracy / len(test_loader)
    return y1_accuracy

In [None]:
# Training Loop
advutilizerModel.to(device)
save_every_epoch = 1

if is_adv:
  here_test_loader = advutl_test_loader
else:
  here_test_loader = p2r_test_loader

start = time.time()
print("Starting Training Loop...")

for epoch in range(last_epoch+1, advutl_num_epochs+1):
    print(f"Epoch {epoch}/{advutl_num_epochs}: ")
    train_loss = fit(advutilizerModel, advutl_train_loader, advutilizerOptimizer, advutilizerCriterion)
    valid_loss = validate(advutilizerModel, advutl_test_loader, advutilizerCriterion)
    y1_accuracy = calcAccuracyTest(advutilizerModel, here_test_loader)
    
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    test_y1_acc.append(y1_accuracy)

    advutl_res = {'train_losses': train_losses,
               'valid_losses': valid_losses,
               'test_y1_acc': test_y1_acc,
               'epoch_number': epoch
                }
    if epoch % save_every_epoch == 0:
        save_model(advutl_saving_path, 'ins', epoch, advutilizerModel, advutl_res)

    print(f"\nTrain Loss: {train_loss:.6f}")
    print(f"Valid Loss: {valid_loss:.6f}")
    print(f"Accuracy on Testset: {y1_accuracy:.6f}")

end = time.time()
print(f"\nTraining time: {(end-start)/60:.3f} minutes")

print('TRAINING COMPLETE')

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

print('Loss plot...')

# loss plots
plt.figure(figsize=(10,7))
plt.title("Train-Valid Loss Trend")
plt.plot(train_losses, color='green', label='Training Loss')
plt.plot(valid_losses, color='blue', label='Validation Loss')
plt.legend(frameon=False)
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.savefig(advutl_saving_path + "loss_plot.png")
plt.show()

In [None]:
plt.figure(figsize=(10,7))
plt.title("Main Label Accuracy Trend")
plt.plot(test_y1_acc, color='green', label='Main Label Test set Accuracy')
plt.legend(frameon=False)
plt.xlabel("epochs")
plt.ylabel("Accuracy")
plt.savefig(advutl_saving_path + "accuracy_test_plot.png")
plt.show()

In [None]:
# Adversary Model
class AdvModel(nn.Module):
    def __init__(self, ngpu):
        super(AdvModel, self).__init__()
        self.ngpu = ngpu
        
        # input is 102
        # classifier:
        self.fllc1 = nn.Linear(102, 256)
        self.actv1 = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fllc2 = nn.Linear(256, 256)
        self.actv2 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout(p=0.3)
        self.fllc3 = nn.Linear(256, 128)
        self.actv3 = nn.ReLU(inplace=True)
        self.dropout3 = nn.Dropout(p=0.4)
        self.fllc4 = nn.Linear(128, 2)
        self.actv4 = nn.LogSoftmax(dim=1)

    def forward(self, x):
        y1 = self.fllc1(x)
        y1 = self.actv1(y1)
        y1 = self.dropout1(y1)
        y1 = self.fllc2(y1)
        y1 = self.actv2(y1)
        y1 = self.dropout2(y1)
        y1 = self.fllc3(y1)
        y1 = self.actv3(y1)
        y1 = self.dropout3(y1)
        y1 = self.fllc4(y1)
        y1 = self.actv4(y1)
        return y1


In [None]:
# Create the ADV
adversaryModel = AdvModel(ngpu).to(device)
# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    adversaryModel = nn.DataParallel(adversaryModel, list(range(ngpu)))

adv_load = load_model(adv_model_path, 'ins', adv_model_number, adversaryModel, device)

In [None]:
# Calc Accuracy for adversary
def calcAdvAccuracyTest(model, test_loader):
    model.to(device)
    print("Calculating Accuracy...")
    model.eval()
    y1_accuracy = 0
    prog_bar = tqdm(enumerate(test_loader), total=len(test_loader))
    with torch.no_grad():
        for i, data in prog_bar:
            inputs, labels = data[0], data[1]
            labels = extract_class_index(labels)
            inputs = inputs.to(torch.float32)
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)
            ps_y1 = torch.exp(output)
            top_p_y1, top_class_y1 = ps_y1.topk(1, dim=1)
            equals_y1 = top_class_y1 == labels.view(*top_class_y1.shape)
            acc_y1 = equals_y1.sum().item()
            y1_accuracy += (acc_y1 / len(equals_y1))            
    y1_accuracy = y1_accuracy / len(test_loader)
    return y1_accuracy

In [None]:
# For Adversary
# Test on obfuscated data
adversaryModel.to(device)
weak_adv_accuracy = calcAdvAccuracyTest(adversaryModel, advutl_test_loader)
print(f"\n Weak Adversary Accuracy on Testset: {weak_adv_accuracy:.6f}")