In [1]:
import argparse
import json
from models import MLPClassifier, Baseline_Embeddings
from models import Seq2Seq, MLP_D, MLP_G, MLP_I, MLP_I_AE, JSDistance, Seq2SeqCAE, Baseline_Embeddings, Baseline_LSTM
from utils import to_gpu, Corpus, batchify, SNLIDataset, collate_snli
import random
import pickle as pkl
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable


In [2]:
#python3.6 train_surrogate.py --data_path ./data/classifier --save_path game_output/ --classifier_path ./data --load_pretrained .
cur_dir = '.'

with open(cur_dir + '/vocab.json', 'r') as fin:
    corpus_vocab = json.load(fin)

corpus_train = SNLIDataset(train=True, vocab_size=11004-4, path='./data/classifier')
corpus_test = SNLIDataset(train=False, vocab_size=11004-4, path='./data/classifier')
trainloader= torch.utils.data.DataLoader(corpus_train, batch_size = 32, collate_fn=collate_snli, shuffle=True)
train_iter = iter(trainloader)
testloader= torch.utils.data.DataLoader(corpus_test, batch_size = 32, collate_fn=collate_snli, shuffle=False)
random.seed(1111)
np.random.seed(1111)
torch.manual_seed(1111)

EPS = 3e-2


original vocab 41574; pruned to 11004
Number of sentences dropped from ./data/classifier/train.txt: 448221 out of 549367 total
original vocab 41574; pruned to 11004
Number of sentences dropped from ./data/classifier/test.txt: 8288 out of 9824 total


In [3]:
autoencoder = torch.load(open(cur_dir + '/models/autoencoder_model.pt', 'rb'))
#gan_gen = torch.load(open(cur_dir + '/models/gan_gen_model.pt', 'rb'))
#gan_disc = torch.load(open(cur_dir + '/models/gan_disc_model.pt', 'rb'))
inverter = torch.load(open(cur_dir + '/models/inverter_model.pt', 'rb'))

classifier1 = Baseline_Embeddings(100, vocab_size=11004)
# classifier1 = Baseline_LSTM(100,300,maxlen=10, gpu=args.cuda)

classifier1.load_state_dict(torch.load('./models' + "/baseline/model_emb.pt"))
vocab_classifier1 = pkl.load(open('./models' + "/vocab.pkl", 'rb'))

mlp_classifier = MLPClassifier(100 * 2, 3, layers='100-50')
#if not args.train_mode:
mlp_classifier.load_state_dict(torch.load('./surrogate{0}.pt'.format('100-50')))

print(classifier1)
print(autoencoder)
print(inverter)
print(mlp_classifier)

optimizer = optim.Adam(mlp_classifier.parameters(),
                           lr=1e03,
                           betas=(0.9, 0.999))




RuntimeError: CUDA error: out of memory

In [4]:
from torch.autograd import Variable

def evaluate_model():
    classifier1.eval()

    test_iter = iter(trainloader)
    correct=0
    total=0
    for batch in test_iter:
        premise, hypothesis, target, _, _, _, _ = batch

        if args.cuda:
            premise=premise.cuda()
            hypothesis = hypothesis.cuda()
            target = target.cuda()

        prob_distrib = classifier1.forward((premise, hypothesis))
        predictions = np.argmax(prob_distrib.data.cpu().numpy(), 1)
        correct+=len(np.where(target.data.cpu().numpy()==predictions)[0])
        total+=premise.size(0)
    acc=correct/float(total)
    print("Accuracy:{0}".format(acc))
    return acc

autoencoder.gpu = True
autoencoder = autoencoder.cuda()
autoencoder.start_symbols = autoencoder.start_symbols.cuda()
#gan_gen = gan_gen.cuda()
#gan_disc = gan_disc.cuda()
classifier1 = classifier1.cuda()
inverter = inverter.cuda()
mlp_classifier = mlp_classifier.cuda()


In [5]:
def train_process(premise, hypothesis, target, premise_words, hypothesis_words, premise_length, hypothesis_length):
    #mx = target.max().item()
    #assert(mx >= 0 and mx < 3)
    #for s, s_w in zip(premise, premise_words):
    #    for i, w in zip(s, s_w):
    #        assert(corpus_vocab.get(w, 3) == i)
    #print(hypothesis_words, flush=True)
    autoencoder.eval()
    inverter.eval()
    classifier1.eval()
    mlp_classifier.train()

    #print(premise.max().item(), flush=True)
    #print(hypothesis.max().item(), flush=True)

    premise_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in premise_words]).cuda()
    hypothesis_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in hypothesis_words]).cuda()

    c_prem = autoencoder.encode(premise_idx, premise_length, noise=False)
    z_prem = inverter(c_prem).detach()

    c_hypo = autoencoder.encode(hypothesis_idx, hypothesis_length, noise=False)
    z_hypo = inverter(c_hypo).detach()

    # z_comb = nn.cat((z_prem, z_hypo), 0).detach()

    output = mlp_classifier(z_prem, z_hypo)
    gold = classifier1((premise, hypothesis)).detach()

    #print(output.shape, flush=True)
    #print(gold.shape, flush=True)

    acc = (torch.argmax(gold, 1) == target).to(torch.float32).mean().item()
    acc_surrogate = (torch.argmax(output, 1) == target).to(torch.float32).mean().item()


    loss = -torch.mean(torch.sum(output * F.softmax(gold, dim=1), 1), 0)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item(), acc, acc_surrogate


In [6]:
def classifier_pred(pw, hw):
    classifier1.eval()

    premise_idx = torch.tensor([vocab_classifier1.get(w, 3) for w in pw]).cuda().unsqueeze(0)
    hypothesis_idx = torch.tensor([vocab_classifier1.get(w, 3) for w in hw]).cuda().unsqueeze(0)

    return F.softmax(classifier1((premise_idx, hypothesis_idx)), 1).squeeze(0).cpu().detach().numpy()


In [7]:
def cross_entropy(p, q):
    q = torch.log(q)
    a = p * q
    a = torch.sum(a)
    a = -a
    return a

In [8]:
ALPHA = 0.05

In [9]:
def perturb(criterion, premise, hypothesis, target, premise_words, hypothesis_words, premise_length, hypothesis_length):
    autoencoder.eval()
    inverter.eval()
    classifier1.eval()
    mlp_classifier.eval()

    premise_words = [premise_words]
    hypothesis_words = [hypothesis_words]
    premisea_length = [premise_length]
    hypothesis_length = [hypothesis_length]


    premise_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in premise_words]).cuda()
    hypothesis_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in hypothesis_words]).cuda()

    c_prem = autoencoder.encode(premise_idx, premise_length, noise=False)
    z_prem = inverter(c_prem).detach()

#     c_hypo = autoencoder.encode(hypothesis_idx, hypothesis_length, noise=False).detach()
#     c_hypo.requires_grad = True
    c_hypo = autoencoder.encode(hypothesis_idx, hypothesis_length, noise=False)
    z_hypo = inverter(c_hypo)
    
    premise = premise.unsqueeze(0)
    hypothesis = hypothesis.unsqueeze(0)
    target = target.unsqueeze(0)

    mlp_classifier.zero_grad()
    inverter.zero_grad()
    
    #temp = torch.Tensor(1, 300).fill_(0).cuda().detach()
    for j in range(5):
        temp = torch.Tensor(1, 300).normal_(0, 0.1).cuda().detach()
        temp.requires_grad=True
        c_hypoprime = [{'params': temp}]
        optimizer = torch.optim.Adam(c_hypoprime, lr=1e-4)

        for i in range(500):
            output2 = torch.nn.functional.softmax(classifier1.forward((premise_idx, hypothesis_idx))).detach()
            z_hypoprime = inverter(c_hypoprime[0]['params'][0])
            output3 = torch.nn.functional.softmax(mlp_classifier(z_prem, z_hypoprime))
            loss4 = cross_entropy(output3, output2) + ALPHA * torch.norm(z_hypoprime, p=2)
            optimizer.zero_grad()
            loss4.backward()
            optimizer.step()
        if(j == 0):
            bestloss = loss4
            bestadv = c_hypoprime
        elif(bestloss > loss4):
            bestloss = loss4
            bestadv = c_hypoprime        
            
    c_hypoprime = bestadv
    nhypo_idx = autoencoder.generate(c_hypoprime[0]['params'][0], 10, False)
    return nhypo_idx.squeeze(0).cpu().numpy()

In [10]:
def maximum(array):
    a = array[0]
    idx = 0
    for i in range(1, 3):
        if(a < array[i]):
            idx = i
            a = array[i]
    return idx

In [11]:
torch.Tensor(1, 300).fill_(0).requires_grad

False

In [12]:
def kl_divergence(p, q):
    k = torch.log(p / q)
    p = p * k
    return torch.sum(p)

In [13]:
a = torch.tensor([0.5118122,  0.23435633, 0.25383145])
b = torch.tensor([0.51985246, 0.29073852, 0.18940896])
print(kl_divergence(b, a))

tensor(0.0153)


In [14]:
import secrets

def samples(alist):
    secure_random = secrets.SystemRandom()
    num_to_select = int(len(alist) / 2)
    list_of_random_items = secure_random.sample(alist, num_to_select)
    return list_of_random_items

In [None]:
criterion = nn.CrossEntropyLoss().cuda()

niter = 0

idx2words = dict(map(lambda x: (x[1], x[0]), corpus_vocab.items()))
oldcorrect = 0
newcorrect = 0
n = 0
alloutputarr = []
while niter < len(testloader):
    niter += 1
    batch = train_iter.next()
    for p, h, t, pw, hw, pl, hl in zip(*batch):
        outputarr = []
        nh = perturb(criterion, p.cuda(), h.cuda(), t.cuda(), pw, hw, pl, hl)
        print('--------------------------------')
        print('Target ', t)
        print(' '.join(pw))
        print(' '.join(hw))
#         outputarr.append(t)
#         outputarr.append(' '.join(pw))
#         outputarr.append(' '.join(hw))
        nhw = (['<sos>'] + [idx2words[i] for i in nh])[:10]
        print(' '.join(nhw))
        oldpred = classifier_pred(pw, hw)
        newpred = classifier_pred(pw, nhw)
        print('Old ', oldpred)
        print('New ', newpred)
        print('Old Prediction: ' + str(maximum(oldpred)))
        print('New Prediction: ' + str(maximum(newpred)))
        print('similarity: ' + str(kl_divergence(torch.tensor(newpred), torch.tensor(oldpred))))
        if(maximum(oldpred) == t.item()):
            oldcorrect = oldcorrect + 1
        if(maximum(newpred) == t.item()):
            newcorrect = newcorrect + 1
        n = n + 1
#         outputarr.append(' '.join(nhw))
#         outputarr.append(oldpred)
#         outputarr.append(newpred)
#         outputarr.append(maximum(oldpred))
#         outputarr.append(maximum(newpred))
#         outputarr.append(kl_divergence(torch.tensor(newpred), torch.tensor(oldpred)))
#         alloutputarr.append(outputarr)
print('oldcorrect: ' + str(oldcorrect))
print('newcorrect: ' + str(newcorrect))
print('number of premises ' + str(n))
#0 entailment #1 neutral #2 contradiction



--------------------------------
Target  tensor(0)
<sos> several children are standing near goats . <eos> <pad>
<sos> many children are standing . <pad> <pad> <pad> <pad>
<sos> people are rafting along outdoors <eos> <eos> <eos> people
Old  [0.57700485 0.18577948 0.23721564]
New  [0.7036453  0.19632126 0.10003341]
Old Prediction: 0
New Prediction: 0
similarity: tensor(0.0641)
--------------------------------
Target  tensor(0)
<sos> boys playing baseball by the water . <eos> <pad>
<sos> kids playing together outside . <pad> <pad> <pad> <pad>
<sos> three men are sunbathing standing <eos> . <eos> <eos>
Old  [0.6392054  0.17891648 0.18187809]
New  [0.38597903 0.17196932 0.44205165]
Old Prediction: 0
New Prediction: 2
similarity: tensor(0.1911)
--------------------------------
Target  tensor(0)
<sos> a girl with long brown hair standing outside .
<sos> a person with hair <pad> <pad> <pad> <pad> <pad>
<sos> there are many people watching something with . <eos>
Old  [0.69285816 0.15054397 0.1

--------------------------------
Target  tensor(0)
<sos> women wearing sunglasses shopping downtown . <eos> <pad> <pad>
<sos> a woman in sunglasses is shopping . <pad> <pad>
<sos> people are bathing some street vendors . <eos> <eos>
Old  [0.32006347 0.23239294 0.44754365]
New  [0.50220287 0.29356474 0.20423241]
Old Prediction: 2
New Prediction: 0
similarity: tensor(0.1346)
--------------------------------
Target  tensor(2)
<sos> a group of people demonstrating . <eos> <pad> <pad>
<sos> a group of people are sleeping . <pad> <pad>
<sos> a youth tournament player fighting over <eos> . <eos>
Old  [0.3475254  0.16046871 0.49200588]
New  [0.1516326  0.4901128  0.35825467]
Old Prediction: 2
New Prediction: 1
similarity: tensor(0.3078)
--------------------------------
Target  tensor(2)
<sos> many men sit and share a meal together .
<sos> women cook together . <pad> <pad> <pad> <pad> <pad>
<sos> two street performers are being outside . <eos> <eos>
Old  [0.50531477 0.27717024 0.21751495]
New  

--------------------------------
Target  tensor(0)
<sos> a bike jumps through the air . <eos> <pad>
<sos> a bike in mid-air . <pad> <pad> <pad> <pad>
<sos> there dog jumps of not standing <eos> <eos> <eos>
Old  [0.47740084 0.24243613 0.28016296]
New  [0.391508   0.15036704 0.45812503]
Old Prediction: 0
New Prediction: 2
similarity: tensor(0.0758)
--------------------------------
Target  tensor(1)
<sos> a crowd is enjoying an outdoor festival . <eos>
<sos> there is a crowd at a music festival .
<sos> two men wearing boots on a boat . <eos>
Old  [0.5114248  0.32424217 0.16433308]
New  [0.37805083 0.213501   0.40844813]
Old Prediction: 0
New Prediction: 2
similarity: tensor(0.1684)
--------------------------------
Target  tensor(1)
<sos> a chef cooking at a <oov> grill . <eos>
<sos> a cook grilling beef at a <oov> grill <pad>
<sos> adult are wearing hanging on a road <eos> .
Old  [0.16119131 0.4506318  0.38817686]
New  [0.47858652 0.15328974 0.36812377]
Old Prediction: 1
New Prediction: 0