In [1]:
import argparse
import json
from models import MLPClassifier, Baseline_Embeddings
from models import Seq2Seq, MLP_D, MLP_G, MLP_I, MLP_I_AE, JSDistance, Seq2SeqCAE, Baseline_Embeddings, Baseline_LSTM
from utils import to_gpu, Corpus, batchify, SNLIDataset, collate_snli
import random
import pickle as pkl
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable


In [5]:
#python3.6 train_surrogate.py --data_path ./data/classifier --save_path game_output/ --classifier_path ./data --load_pretrained .
cur_dir = '.'

with open(cur_dir + '/vocab.json', 'r') as fin:
    corpus_vocab = json.load(fin)

corpus_train = SNLIDataset(train=True, vocab_size=11004-4, path='./data/classifier')
corpus_test = SNLIDataset(train=False, vocab_size=11004-4, path='./data/classifier')
trainloader= torch.utils.data.DataLoader(corpus_train, batch_size = 32, collate_fn=collate_snli, shuffle=True)
train_iter = iter(trainloader)
testloader= torch.utils.data.DataLoader(corpus_test, batch_size = 32, collate_fn=collate_snli, shuffle=False)
random.seed(1111)
np.random.seed(1111)
torch.manual_seed(1111)

EPS = 3e-2


original vocab 41574; pruned to 11004
Number of sentences dropped from ./data/classifier/train.txt: 448221 out of 549367 total
original vocab 41574; pruned to 11004
Number of sentences dropped from ./data/classifier/test.txt: 8288 out of 9824 total


In [8]:
autoencoder = torch.load(open(cur_dir + '/models/autoencoder_model.pt', 'rb'))
#gan_gen = torch.load(open(cur_dir + '/models/gan_gen_model.pt', 'rb'))
#gan_disc = torch.load(open(cur_dir + '/models/gan_disc_model.pt', 'rb'))
inverter = torch.load(open(cur_dir + '/models/inverter_model.pt', 'rb'))

classifier1 = Baseline_Embeddings(100, vocab_size=11004)
#classifier1 = Baseline_LSTM(100,300,maxlen=args.maxlen, gpu=args.cuda)
classifier1.load_state_dict(torch.load('./models' + "/baseline/model_emb.pt"))
vocab_classifier1 = pkl.load(open('./models' + "/vocab.pkl", 'rb'))

mlp_classifier = MLPClassifier(100 * 2, 3, layers='100-50')
#if not args.train_mode:
mlp_classifier.load_state_dict(torch.load('./surrogate{0}.pt'.format('100-50')))

print(classifier1)
print(autoencoder)
print(inverter)
print(mlp_classifier)

optimizer = optim.Adam(mlp_classifier.parameters(),
                           lr=1e03,
                           betas=(0.9, 0.999))


Baseline_Embeddings(
  (embedding_prem): Embedding(11004, 100)
  (embedding_hypo): Embedding(11004, 100)
  (linear): Linear(in_features=200, out_features=3, bias=True)
)
Seq2SeqCAE(
  (embedding): Embedding(11004, 300)
  (embedding_decoder): Embedding(11004, 300)
  (encoder): Sequential(
    (layer-1): Conv1d(300, 500, kernel_size=(3,), stride=(1,))
    (bn-1): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-1): LeakyReLU(negative_slope=0.2, inplace)
    (layer-2): Conv1d(500, 700, kernel_size=(3,), stride=(2,))
    (bn-2): BatchNorm1d(700, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-2): LeakyReLU(negative_slope=0.2, inplace)
    (layer-3): Conv1d(700, 1000, kernel_size=(3,), stride=(2,))
    (bn-3): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-3): LeakyReLU(negative_slope=0.2, inplace)
  )
  (linear): Linear(in_features=1000, out_features=300, bias=

In [9]:
from torch.autograd import Variable

def evaluate_model():
    classifier1.eval()

    test_iter = iter(trainloader)
    correct=0
    total=0
    for batch in test_iter:
        premise, hypothesis, target, _, _, _, _ = batch

        if args.cuda:
            premise=premise.cuda()
            hypothesis = hypothesis.cuda()
            target = target.cuda()

        prob_distrib = classifier1.forward((premise, hypothesis))
        predictions = np.argmax(prob_distrib.data.cpu().numpy(), 1)
        correct+=len(np.where(target.data.cpu().numpy()==predictions)[0])
        total+=premise.size(0)
    acc=correct/float(total)
    print("Accuracy:{0}".format(acc))
    return acc

autoencoder.gpu = True
autoencoder = autoencoder.cuda()
autoencoder.start_symbols = autoencoder.start_symbols.cuda()
#gan_gen = gan_gen.cuda()
#gan_disc = gan_disc.cuda()
classifier1 = classifier1.cuda()
inverter = inverter.cuda()
mlp_classifier = mlp_classifier.cuda()


In [10]:
def train_process(premise, hypothesis, target, premise_words, hypothesis_words, premise_length, hypothesis_length):
    #mx = target.max().item()
    #assert(mx >= 0 and mx < 3)
    #for s, s_w in zip(premise, premise_words):
    #    for i, w in zip(s, s_w):
    #        assert(corpus_vocab.get(w, 3) == i)
    #print(hypothesis_words, flush=True)
    autoencoder.eval()
    inverter.eval()
    classifier1.eval()
    mlp_classifier.train()

    #print(premise.max().item(), flush=True)
    #print(hypothesis.max().item(), flush=True)

    premise_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in premise_words]).cuda()
    hypothesis_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in hypothesis_words]).cuda()

    c_prem = autoencoder.encode(premise_idx, premise_length, noise=False)
    z_prem = inverter(c_prem).detach()

    c_hypo = autoencoder.encode(hypothesis_idx, hypothesis_length, noise=False)
    z_hypo = inverter(c_hypo).detach()

    # z_comb = nn.cat((z_prem, z_hypo), 0).detach()

    output = mlp_classifier(z_prem, z_hypo)
    gold = classifier1((premise, hypothesis)).detach()

    #print(output.shape, flush=True)
    #print(gold.shape, flush=True)

    acc = (torch.argmax(gold, 1) == target).to(torch.float32).mean().item()
    acc_surrogate = (torch.argmax(output, 1) == target).to(torch.float32).mean().item()


    loss = -torch.mean(torch.sum(output * F.softmax(gold, dim=1), 1), 0)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item(), acc, acc_surrogate


In [17]:
def classifier_pred(pw, hw):
    classifier1.eval()

    premise_idx = torch.tensor([vocab_classifier1.get(w, 3) for w in pw]).cuda().unsqueeze(0)
    hypothesis_idx = torch.tensor([vocab_classifier1.get(w, 3) for w in hw]).cuda().unsqueeze(0)

    return F.softmax(classifier1((premise_idx, hypothesis_idx)), 1).squeeze(0).cpu().detach().numpy()


In [205]:
def cross_entropy(p, q):
    q = torch.log(q)
    a = p * q
    a = torch.sum(a)
    a = -a
    return a

In [266]:
def perturb(criterion, premise, hypothesis, target, premise_words, hypothesis_words, premise_length, hypothesis_length):
    autoencoder.eval()
    inverter.eval()
    classifier1.eval()
    mlp_classifier.eval()

    premise_words = [premise_words]
    hypothesis_words = [hypothesis_words]
    premisea_length = [premise_length]
    hypothesis_length = [hypothesis_length]


    premise_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in premise_words]).cuda()
    hypothesis_idx = torch.tensor([[corpus_vocab.get(w, 3) for w in s] for s in hypothesis_words]).cuda()

    c_prem = autoencoder.encode(premise_idx, premise_length, noise=False)
    z_prem = inverter(c_prem).detach()

    c_hypo = autoencoder.encode(hypothesis_idx, hypothesis_length, noise=False).detach()
    c_hypo.requires_grad = True
    z_hypo = inverter(c_hypo)
    
    premise = premise.unsqueeze(0)
    hypothesis = hypothesis.unsqueeze(0)
    target = target.unsqueeze(0)
    
#     output = torch.nn.functional.softmax(mlp_classifier(z_prem, z_hypo))
#     output2 = torch.nn.functional.softmax(classifier1.forward((premise_idx, hypothesis_idx))).detach()
#     print("output")
#     print(output)
#     print("output2")
#     print(output2)

#     loss = criterion(output, target)
    mlp_classifier.zero_grad()
    inverter.zero_grad()
#     loss.backward()
#     loss2 = criterion(output2, target)

#     direction = torch.sign(c_hypo.grad)
#     nc_hypo = c_hypo + EPS * direction
#     nhypo_idx = autoencoder.generate(nc_hypo, 10, False)
#     z_hypoprime = inverter(nc_hypo).detach()

#     output3 = torch.nn.functional.softmax(mlp_classifier(z_prem, z_hypoprime))
#     print("output3")
#     print(output3)
#     loss3 = criterion(output3, target)
#     print(loss3)

#     loss4 = cross_entropy(output3, output2)
#     print("loss4")
#     print(loss4)
    
    c_hypoprime = [{'params': c_hypo}]
    optimizer = torch.optim.Adam(c_hypoprime)
    for i in range(1000):
        output2 = torch.nn.functional.softmax(classifier1.forward((premise_idx, hypothesis_idx))).detach()
        z_hypoprime = inverter(c_hypoprime[0]['params'][0])
        output3 = torch.nn.functional.softmax(mlp_classifier(z_prem, z_hypoprime))
        loss4 = cross_entropy(output3, output2)
        optimizer.zero_grad()
        loss4.backward()
        optimizer.step()
#     print(c_hypoprime)
    
    nhypo_idx = autoencoder.generate(c_hypoprime[0]['params'][0], 10, False)
    return nhypo_idx.squeeze(0).cpu().numpy()

In [268]:
criterion = nn.CrossEntropyLoss().cuda()

niter = 0

idx2words = dict(map(lambda x: (x[1], x[0]), corpus_vocab.items()))
while niter < len(testloader):
    niter += 1
    batch = train_iter.next()
    for p, h, t, pw, hw, pl, hl in zip(*batch):
        nh = perturb(criterion, p.cuda(), h.cuda(), t.cuda(), pw, hw, pl, hl)
        print('--------------------------------')
        print('Target ', t)
        print(' '.join(pw))
        print(' '.join(hw))
        nhw = (['<sos>'] + [idx2words[i] for i in nh])[:20]
        print(' '.join(nhw))
        print('Old ', classifier_pred(pw, hw))
        print('New ', classifier_pred(pw, nhw))
        break



--------------------------------
Target  tensor(1)
<sos> a skateboarder doing a trick on a ramp <eos>
<sos> a skateboarder performs an ollie of a ramp .
<sos> people are doing an outdoor body of to watch <eos>
Old  [0.2884084  0.38671458 0.324877  ]
New  [0.6106356  0.26335478 0.12600963]
--------------------------------
Target  tensor(1)
<sos> rider making high jump on motorcycle . <eos> <pad>
<sos> man is competing in the x games . <pad>
<sos> people are in mud with the street food <oov> <eos>
Old  [0.25560644 0.38255015 0.3618434 ]
New  [0.47206467 0.22966062 0.2982748 ]
--------------------------------
Target  tensor(0)
<sos> man riding his bike pass a ship . <eos>
<sos> there is a man riding a bike outside .
<sos> the crowd is doing important to play their time inside
Old  [0.52941716 0.16276962 0.30781326]
New  [0.14251544 0.70748556 0.14999902]
--------------------------------
Target  tensor(2)
<sos> people diving into a swimming pool . <eos> <pad>
<sos> a woman drinks juice . <

KeyboardInterrupt: 