In [13]:
import os
import time
import pickle
from random import *
import scipy.io as sio
import input_data
import model_torch as model
from sklearn.decomposition import PCA
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
import tool
import math
from fastai.conv_learner import *

import torch.nn as nn

from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import normalize

a = Random()
a.seed(1)

In [14]:
test_description='testDeVise'
rep_num = 1
id_split=range(0,10)
# SNIP, SMP18
choose_dataset="SNIP"
# Caps, CapsDim,CapsWS, CapsAll
choose_model=[]
# without seen: 0, with seen: 1, fixed with some classes: -1
dataSetting={}
dataSetting['test_mode']=0
######
dataSetting['random_class']=False
dataSetting['training_prob']=0.8
dataSetting['test_intrain_prob']=0.3

# =============================================================================

dataSetting['data_prefix']='data/SNIP/'
dataSetting['dataset_name']='dataSNIP.txt'
dataSetting['wordvec_name']='wiki.en.vec'
dataSetting['sim_name_withS']='SNIP_similarity_M_zscore.mat'
dataSetting['sim_name_withOS']='SNIP10seen.mat'
if choose_dataset=="SMP18":
    dataSetting['data_prefix']='data/SMP18/'
    dataSetting['dataset_name']='dataSMP18.txt'
    dataSetting['wordvec_name']='sgns_merge_subsetSMP.txt'
    dataSetting['sim_name_withS']='SMP_similarity_M_zscore.mat'
    dataSetting['sim_name_withOS']='SMP44_wSeen_R1.mat'

if choose_dataset =='SNIP':
    dataSetting['unseen_class'] = [['playlist'], ['book']]
elif choose_dataset =='SMP18':
    dataSetting['unseen_class'] = [['聊天'],['网站'],['email'],['地图'],['时间'],['健康']]

# Settings

In [15]:
def setting(data):
    vocab_size, word_emb_size = data['embedding'].shape
    sample_num, max_time = data['x_tr'].shape
    test_num = data['x_te'].shape[0]
    s_cnum = np.unique(data['y_tr']).shape[0]
    u_cnum = np.unique(data['y_te']).shape[0]
    config = {}
    config['model_name'] = choose_model
    config['dataset']=choose_dataset
    config['test_mode']=dataSetting['test_mode']
    config['training_prob']=dataSetting['training_prob']
    config['test_intrain_prob']=dataSetting['test_intrain_prob']
    config['wordvec']=dataSetting['wordvec_name']
    config['sim_name_withS']=dataSetting['sim_name_withS']
    config['sim_name_withOS']=dataSetting['sim_name_withOS']
    config['keep_prob'] = 0.5 # embedding dropout keep rate
    config['hidden_size'] = 16 # embedding vector size
    config['batch_size'] = 50 # vocab size of word vectors
    config['vocab_size'] = vocab_size # vocab size of word vectors (10,895)
    config['num_epochs'] = 20 # number of epochs
    config['max_time'] = max_time
    config['sample_num'] = sample_num #sample number of training data
    config['test_num'] = test_num #number of test data
    config['s_cnum'] = s_cnum # seen class num
    config['u_cnum'] = u_cnum #unseen class num
    config['word_emb_size'] = word_emb_size # embedding size of word vectors (300)
    config['d_a'] = 10 # self-attention weight hidden units number
    config['output_atoms'] = 10 #capsule output atoms
    config['r'] = 3 #self-attention weight hops
    config['num_routing'] = 3 #capsule routing num
    config['alpha'] = 0.001 # coefficient of self-attention loss
    config['margin'] = 1.0 # ranking loss margin
    config['learning_rate'] = 0.1
    config['lr_step_size']=10
    config['lr_gamma']=0.1
    config['sim_scale'] = 4 #sim scale
    config['nlayers'] = 2 # default for bilstm
    config['seen_class']=data['seen_class']
    config['unseen_class']=data['unseen_class']
    config['data_prefix']=dataSetting['data_prefix']
    config['ckpt_dir'] = './'+test_description+'/' #check point dir
    config['experiment_time']= time.strftime('%y%m%d%I%M%S')
    config['best_epoch']=0
    config['best_acc']=0
    config['report']=True
    config['cuda_id']=0
    config['untrain_classlen']=data['untrain_classlen']#XIAOTONG
    return config

In [16]:
def generate_batch(n, batch_size):
    batch_index = a.sample(range(n), batch_size)
    return batch_index

def sort_batch(batch_x, batch_y, batch_len, batch_ind):
    batch_len_new = batch_len
    batch_len_new, perm_idx = batch_len_new.sort(0, descending=True)
    batch_x_new = batch_x[perm_idx]
    batch_y_new = batch_y[perm_idx]
    batch_ind_new = batch_ind[perm_idx]

    return batch_x_new, batch_y_new, \
           batch_len_new, batch_ind_new

def cos_loss(input, target):
    return 1 - F.cosine_similarity(input, target).mean()

In [17]:
class myLSTM(nn.Module):
    def __init__(self, config, pretrained_embedding = None):
        super(myLSTM, self).__init__()
        
        self.cuda_id=config['cuda_id']
        self.hidden_size = config['hidden_size']
        self.vocab_size = config['vocab_size']
        self.word_emb_size = config['word_emb_size']
        self.learning_rate = config['learning_rate']
        self.batch_size = config['batch_size']
        self.s_cnum = config['s_cnum']
        self.nlayers = config['nlayers']
        

        self.word_embedding = nn.Embedding(config['vocab_size'], config['word_emb_size'])
        self.bilstm = nn.LSTM(config['word_emb_size'], config['hidden_size'],
                              config['nlayers'], bidirectional=True, batch_first=True)
        self.drop = nn.Dropout(config['keep_prob'])
        self.predict =nn.Linear(config['hidden_size']*2,config['s_cnum'])



    def forward(self, input,len, embedding):
        self.s_len = len
        input = input.transpose(0,1) #(Bach,Length,D) => (L,B,D)
        # Attention
        if (embedding.nelement() != 0):
            self.word_embedding = nn.Embedding.from_pretrained(embedding)

        emb = self.word_embedding(input)
        packed_emb = pack_padded_sequence(emb, len)

        #Initialize hidden states
        h_0 = torch.zeros(self.nlayers*2, input.shape[1], self.hidden_size)
        c_0 = torch.zeros(self.nlayers*2, input.shape[1], self.hidden_size)
        if torch.cuda.is_available():
            h_0=h_0.cuda(self.cuda_id)
            c_0=c_0.cuda(self.cuda_id)
        
        outp, (final_hidden_state,final_cell_state) = self.bilstm(packed_emb, (h_0, c_0))## [bsz, len, d_h * 2]
        self.hh=torch.cat((final_hidden_state[-1],final_hidden_state[-2]),1)
        final_output=self.predict(self.hh)
        
        return final_output

In [18]:
    data = input_data.read_datasets(dataSetting)
    
    # load settings
    config = setting(data)
    cuda_id=config['cuda_id']
            
    x_tr = torch.from_numpy(data['x_tr'])
    y_tr = torch.from_numpy(data['y_tr'])
    y_tr_id = torch.from_numpy(data['y_tr'])
    y_te_id = torch.from_numpy(data['y_te'])
    y_ind = torch.from_numpy(data['s_label'])
    s_len = torch.from_numpy(data['s_len'])
    embedding = torch.from_numpy(data['embedding'])
    x_te = torch.from_numpy(data['x_te'])
    u_len = torch.from_numpy(data['u_len'])

    if torch.cuda.is_available():
        x_tr =x_tr.cuda(cuda_id)
        y_tr =y_tr.cuda(cuda_id)
        y_tr_id = y_tr_id.cuda(cuda_id)
        y_te_id =y_te_id.cuda(cuda_id)
        y_ind =y_ind.cuda(cuda_id)
        s_len = s_len.cuda(cuda_id)
        embedding=embedding.cuda(cuda_id)
        x_te = x_te.cuda(cuda_id)
        u_len = u_len.cuda(cuda_id)
        print('------------------use gpu------------------')
    
# Training cycle
    batch_num = int(config['sample_num'] / config['batch_size']+1)

    # load model
    lstm=model.myLSTM(config,embedding).cuda()
    loss_fn = F.cross_entropy
    optimizer = optim.Adam(lstm.parameters(), lr=config['learning_rate'])

    if not os.path.exists(config['ckpt_dir']):
        os.mkdir(config['ckpt_dir'])

    loss_fn = torch.nn.CrossEntropyLoss(reduce=False, size_average=False)
    hh=np.zeros((config['sample_num'],config['hidden_size']*2))

    print('------------------training begin---------------------')
    print('sample_number=',config['sample_num']," ,batch_size=",config['batch_size'],'batch_num=',batch_num)

------------------read datasets begin-------------------
------------------load word2vec begin-------------------
------------------load word2vec end---------------------
------------------read datasets end---------------------
------------------use gpu------------------
------------------training begin---------------------
sample_number= 9881  ,batch_size= 50 batch_num= 198


In [20]:
# LSTM 1
import torch._utils
try:
    torch._utils._rebuild_tensor_v2
except AttributeError:
    def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
        tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
        tensor.requires_grad = requires_grad
        tensor._backward_hooks = backward_hooks
        return tensor
    torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
myModel=torch.load(choose_dataset+'_model.pth')

In [22]:
# Zeroshot Learning part
p=0.1
for param in myModel.parameters():
    param.requires_grad = False

AttributeError: 'collections.OrderedDict' object has no attribute 'parameters'

In [23]:
myModel.fc = nn.Sequential(nn.BatchNorm1d(512),
                     nn.Dropout(p),
                     nn.Linear(in_features=512, out_features=512, bias=True),
                     nn.ReLU(),
                     nn.BatchNorm1d(512),
                     nn.Dropout(p),
                     nn.Linear(in_features=512, out_features=300, bias=True))
learn = Learner(modeldata, SingleModel(to_gpu(model)))
learn.opt_fn = partial(optim.Adam, betas=(0.9, 0.99))

NameError: name 'modeldata' is not defined