In [1]:
import time
import argparse
import math
import os
import torch
import torch.nn as nn
from torch import optim
import numpy
import matplotlib
from matplotlib import pyplot as plt


# Import your model files.
from model import make_model, Classifier, NoamOpt, LabelSmoothing, fgim_attack
from data import prepare_data, non_pair_data_loader, get_cuda, pad_batch_seuqences, load_human_answer,\
    id2text_sentence, to_var, calc_bleu


In [2]:
from attributedict.collections import AttributeDict

In [3]:
args = AttributeDict({'id_pad':0, 'id_unk':1, "id_bos" : 2, 'id_eos':3,'task':'dialect',\
                     'word_to_id_file': '', 'data_path' :'../../data/uae-eg/processed_files/', \
                     'word_dict_max_num' :5, 'batch_size' :128, 'max_sequence_length' :64, 'num_layers_AE':2,
                     'transformer_model_size':256, 'transformer_ff_size' : 1024,'latent_size': 256, \
                     'word_dropout': 1.0, 'embedding_dropout':0.5,'learning_rate':0.001, 'label_size':1})

In [4]:
args.id_bos

2

In [5]:
args.if_load_from_checkpoint = True
args.checkpoint_name = "1588047455"

In [6]:
def add_log(ss):
    now_time = time.strftime("[%Y-%m-%d %H:%M:%S]: ", time.localtime())
    print(now_time + ss)
    with open(args.log_file, 'a') as f:
        f.write(now_time + str(ss) + '\n')
    return


def add_output(ss):
    with open(args.output_file, 'a') as f:
        f.write(str(ss) + '\n')
    return


def preparation():
    # set model save path
    if args.if_load_from_checkpoint:
        timestamp = args.checkpoint_name
    else:
        timestamp = str(int(time.time()))
        print("create new model save path: %s" % timestamp)
    args.current_save_path = 'save/%s/' % timestamp
    args.log_file = args.current_save_path + time.strftime("log_%Y_%m_%d_%H_%M_%S.txt", time.localtime())
    args.output_file = args.current_save_path + time.strftime("output_%Y_%m_%d_%H_%M_%S.txt", time.localtime())
    print("create log file at path: %s" % args.log_file)

    if os.path.exists(args.current_save_path):
        add_log("Load checkpoint model from Path: %s" % args.current_save_path)
    else:
        os.makedirs(args.current_save_path)
        add_log("Path: %s is created" % args.current_save_path)

    # set task type
    if args.task == 'yelp':
        args.data_path = '../../data/yelp/processed_files/'
    elif args.task == 'amazon':
        args.data_path = '../../data/amazon/processed_files/'
    elif args.task == 'dialect':
        args.data_path = '../../data/uae-eg/processed_files/'
    elif args.task == 'imagecaption':
        pass
    else:
        raise TypeError('Wrong task type!')

    # prepare data
    args.id_to_word, args.vocab_size, \
    args.train_file_list, args.train_label_list = prepare_data(
        data_path=args.data_path, max_num=args.word_dict_max_num, task_type=args.task
    )
    return

In [7]:
preparation()

ae_model = get_cuda(make_model(d_vocab=args.vocab_size,
                               N=args.num_layers_AE,
                               d_model=args.transformer_model_size,
                               latent_size=args.latent_size,
                               d_ff=args.transformer_ff_size,
))
dis_model = get_cuda(Classifier(latent_size=args.latent_size, output_size=args.label_size))

create log file at path: save/1588047455/log_2020_04_28_19_30_34.txt
[2020-04-28 19:30:34]: Load checkpoint model from Path: save/1588047455/
prepare data ...
Load word-dict with 41286 size and 5 max_num.


In [8]:
ae_model.load_state_dict(torch.load(args.current_save_path + 'ae_model_params.pkl'))
dis_model.load_state_dict(torch.load(args.current_save_path + 'dis_model_params.pkl'))

<All keys matched successfully>

In [9]:
def eval_iters(ae_model, dis_model):
    eval_data_loader = non_pair_data_loader(
        batch_size=1, id_bos=args.id_bos,
        id_eos=args.id_eos, id_unk=args.id_unk,
        max_sequence_length=args.max_sequence_length, vocab_size=args.vocab_size
    )
    eval_file_list = [
        args.data_path + 'dialect_dev.uae',
        args.data_path + 'dialect_dev.eg',
    ]
    eval_label_list = [
        [0],
        [1],
    ]
    eval_data_loader.create_batches(eval_file_list, eval_label_list, if_shuffle=False)
#     gold_ans = load_human_answer(args.data_path)
#     assert len(gold_ans) == eval_data_loader.num_batch

    count = 0
    add_log("Start eval process.")
    ae_model.eval()
    dis_model.eval()
    for it in range(eval_data_loader.num_batch):
        batch_sentences, tensor_labels, \
        tensor_src, tensor_src_mask, tensor_tgt, tensor_tgt_y, \
        tensor_tgt_mask, tensor_ntokens = eval_data_loader.next_batch()

        print("------------%d------------" % it)
        print("origin_input","\n label is ", tensor_labels)
        print(id2text_sentence(tensor_tgt_y[0], args.id_to_word))

        latent, out = ae_model.forward(tensor_src, tensor_tgt, tensor_src_mask, tensor_tgt_mask)
        generator_text = ae_model.greedy_decode(latent,
                                                max_len=args.max_sequence_length,
                                                start_id=args.id_bos)
#         print("------------------------")
#         print("autoencoder output:")
#         print(id2text_sentence(generator_text[0], args.id_to_word))

        # Define target label
        target = get_cuda(torch.tensor([[1.0]], dtype=torch.float))
        if tensor_labels[0].item() > 0.5:
            target = get_cuda(torch.tensor([[0.0]], dtype=torch.float))
        print("target_labels", target)

        modify_text = fgim_attack(dis_model, latent, target, ae_model, args.max_sequence_length, args.id_bos,
                                        id2text_sentence, args.id_to_word, '')
        add_output(modify_text)
        count += 1
        if count >= 10:
            break
    return

In [10]:
eval_iters(ae_model, dis_model)

Load data from ../../data/uae-eg/processed_files/dialect_dev.uae ../../data/uae-eg/processed_files/dialect_dev.eg !
Create 59225 batches with 1 batch_size
[2020-04-28 19:31:34]: Start eval process.
------------0------------
origin_input 
 label is  tensor([[0.]], device='cuda:0')
user الانضباط <UNK> hash num الف درهم <UNK> له <UNK> <UNK> <UNK> <UNK> في مباراه راس الخيمه
target_labels tensor([[1.]], device='cuda:0')
epsilon: 7.0




| It  1 | dis model pred 0.0787 |
hash وزاره <UNK> num مليون فقط <UNK> له الف <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه وفي اي حاجه سلمان العوده
epsilon: 6.3
| It  2 | dis model pred 0.7125 |
hash المحكمه <UNK> num قال له الف <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه علشان <UNK> مباراه اكبر اتصالات
epsilon: 5.67
| It  3 | dis model pred 0.9597 |
hash المحكمه <UNK> num قال له الف <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه علشان <UNK> مباراه اكبر اتصالات
epsilon: 5.103
| It  4 | dis model pred 0.9713 |
hash المحكمه <UNK> num قال له الف <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه علشان <UNK> مباراه اكبر اتصالات
epsilon: 4.5927
| It  5 | dis model pred 0.9767 |
hash المحكمه <UNK> num قال له الف <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه علشان <UNK> مباراه اكبر اتصالات
epsilon: 8.0
| It  1 | dis model pred 0.0787 |
hash وزاره <UNK> num مليون فقط <UNK> له الف <UNK> <UNK> <UNK> <UNK> <UNK> في راس الخيمه وفي اي حاجه سلمان العوده
epsilon: 7.2
| It  2 | dis model pr

target_labels tensor([[1.]], device='cuda:0')
epsilon: 7.0
| It  1 | dis model pred 0.6406 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 6.3
| It  2 | dis model pred 0.9626 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 5.67
| It  3 | dis model pred 0.9714 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 5.103
| It  4 | dis model pred 0.9755 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 4.5927
| It  5 | dis model pred 0.9782 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 8.0
| It  1 | dis model pred 0.6406 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و عندكو تعلم جرح ام غيرها num شكلها لوحده
epsilon: 7.2
| It  2 | dis model pred 0.9731 |
اكتر قلبي بيجيب احلام و تعبت و <UNK> عانقت نفسك num و ع

In [27]:
print(ae_model)

EncoderDecoder(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0): Linear(in_features=256, out_features=256, bias=True)
            (1): Linear(in_features=256, out_features=256, bias=True)
            (2): Linear(in_features=256, out_features=256, bias=True)
            (3): Linear(in_features=256, out_features=256, bias=True)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=256, out_features=1024, bias=True)
          (w_2): Linear(in_features=1024, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (sublayer): ModuleList(
          (0): SublayerConnection(
            (norm): LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): SublayerConnection(
            (norm): LayerNorm()