In [1]:
# Load basic module
import os
import json
import random
random.seed(0)
import math
from copy import deepcopy
import argparse
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Load self-defined module
from generator_seq import Generator, Gen_args
from discriminator import Discriminator, Dis_args
from privatizer import Privatizer, Pri_args
from train import pretrain_gen, train_adv, train_dis, train_pri
from data_loader import LoadData

# Set random seed
SEED = 0
random.seed(SEED)
np.random.seed(SEED)

# Basic Training Paramters
BATCH_SIZE = 64
USE_CUDA = False
PRE_GEN_EPOCH_NUM = 50
PRE_ADV_EPOCH_NUM = 2
PRE_DIS_EPOCH_NUM = 2
GAP_EPOCH_NUM = 20
MC_NUM = 16
GAP_W = [0.1, 0.5, 0.5]
GEN_LR = 0.01
ADV_LR = 0.01
DIS_LR = 0.01
PRI_LR = 0.01
PRE_GEN_PATH = "../param/pre_generator.pkl"
PRE_ADV_PATH = "../param/pre_adversary.pkl"
PRE_DIS_PATH = "../param/pre_discriminator.pkl"
PRE_PRI_PATH = "../param/pre_privatizer.pkl"

GEN_PATH = "../param/generator_v4.pkl"
ADV_PATH = "../param/adversary_v4.pkl"
DIS_PATH = "../param/discriminator_v4.pkl"
PRI_PATH = "../param/privatizer_v4.pkl"

# Get training and testing dataloader
train_loader, test_loader, \
    MAX_SEQ_LEN, VOCAB_SIZE, index_map = LoadData(data_path="../data/dataset_batch_v2.json", 
                                                  word2id_path="../data/word_map_v2.json", 
                                                  train_split=0.8,
                                                  BATCH_SIZE=64)


# Genrator Parameters
# gen_args = Gen_args(vocab_size=VOCAB_SIZE, 
#                     emb_dim=64, 
#                     hidden_dim=64)
gen_args = Gen_args(vocab_size=VOCAB_SIZE, 
                    emb_dim=64,
                    enc_hid_dim=64,
                    dec_hid_dim=64,
                    enc_dropout=0.5,
                    attn_dim=8,
                    dec_dropout=0.5)

# Privatizer Parameters
pri_args = Pri_args(vocab_size=VOCAB_SIZE, 
                    emb_dim=64,
                    enc_hid_dim=64,
                    dec_hid_dim=64,
                    enc_dropout=0.5)

# Discriminator Parameters
dis_args = Dis_args(num_classes=2, 
                    vocab_size=VOCAB_SIZE, 
                    emb_dim=64, 
                    filter_sizes=[3, 4, 5], 
                    num_filters=[150, 150, 150], 
                    dropout=0.5)

# Adversarial Parameters
adv_args = Dis_args(num_classes=3, 
                    vocab_size=VOCAB_SIZE, 
                    emb_dim=64, 
                    filter_sizes=[3, 4, 5], 
                    num_filters=[150, 150, 150], 
                    # filter_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20],
                    # num_filters=[100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160],
                    dropout=0.5)

# Define Networks
generator = Generator(gen_args, USE_CUDA)
discriminator = Discriminator(dis_args)
adversary = Discriminator(adv_args)
privatizer = Privatizer(pri_args)

if USE_CUDA:
    generator = generator.cuda()
    discriminator = discriminator.cuda()
    adversary = adversary.cuda()
    privatizer = privatizer.cuda()

# Load pretrained parameters
generator.load_state_dict(torch.load(PRE_GEN_PATH))
privatizer.load_state_dict(torch.load(PRI_PATH))
discriminator.load_state_dict(torch.load(DIS_PATH))
adversary.load_state_dict(torch.load(ADV_PATH))
# Define optimizer and loss function for discriminator

total_loss = 0.
total_words = 0.
step = 0
for batch in tqdm(test_loader):
    step += 1
    if step > 2:
        break
    data = batch["x"]
    target = batch["x"][:,:,0]
    if USE_CUDA:
        data, target = data.cuda(), target.cuda()
    target = target.contiguous().view(-1)
    with torch.no_grad():
        pred, _ = generator.forward_with_noise(input=data, privatizer=privatizer)
target_ = target.detach().cpu().numpy()
_, pred_ = torch.max(pred, axis=-1)
pred_ = pred_.cpu().numpy()
target_query = []
pred_query = []
for i in range(0, 87*3):
    target_query.append(index_map[target_[i]])
    pred_query.append(index_map[pred_[i]])
print("[INFO] Target query: ", target_query)
print("[INFO] Predicted query: ", pred_query)


2020-05-01 03:54:48,681	INFO resource_spec.py:212 -- Starting Ray with 35.55 GiB memory available for workers and up to 17.79 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-05-01 03:54:49,074	INFO services.py:1148 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


[INFO] Complete loading data, with # of {'0': 4000, '1': 4000, '2': 4000}


  5%|▌         | 2/38 [00:02<00:50,  1.41s/it]

[INFO] Target query:  ['<SOS>', 'harry', 'potter', 'goblet', 'of', 'fire', '<POS>', 'information', 'on', 'the', 'seventh', 'book', '<POS>', 'harry', 'potter', 'goblet', 'of', 'fire', '<POS>', 'picture', 'of', 'and', 'emma', '<POS>', 'harry', 'potter', 'goblet', 'of', 'fire', '<POS>', 'picture', 'of', 'and', 'emma', '<EOS>', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '<SOS>', 'negative', 'pregnancy', 'test', 'no', 'period', 'for', 'month', 'yr', 'of', 'age', '<POS>', 'menopausal', 'symptom', '<POS>', 'bank', 'of', '<POS>', 'kindred', '<POS>', 'map', 'of', '<EOS>', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*'




In [1]:
import gensim
model = gensim.models.KeyedVectors.load_word2vec_format('../GoogleNews-vectors-negative300.bin', binary=True)


NameError: name 'torch' is not defined

In [13]:
import json 
with open("../data/word_map_v2.json", "r") as json_file:
    word_map = json.load(json_file)
for key in word_map.keys():
    if key == 



In [5]:
import torch
import torch.nn as nn
weights = torch.FloatTensor(model.vectors) 
# FloatTensor containing pretrained weights
embedding = nn.Embedding.from_pretrained(weights)
# Get embeddings for index 1
input = torch.Tensor(["word"])
embedding(input)

ValueError: too many dimensions 'str'