In [1]:
import torch
import random
import os
from torch import nn
from torch.utils.data import Dataset,DataLoader

corpus_file = 'hw2.1_corpus.txt'

In [2]:
# Load Corpus

corpus = []
with open(corpus_file,'r') as f:
    for row in f:
        row = row.replace('\n','')
        row = [w for w in row]
        corpus.append(row)

In [3]:
# Use list to guarantee the embedded index for each word are same everytime

words = []
words_set = set()
for ws in corpus:
    for w in ws:
        if w not in words_set:
            words_set.add(w)
            words.append(w)

In [4]:
import re

class Embedding:

    def __init__(self,words=None,dim=300):
        self.word_dict = {}
        self.word_list = []
        self.emb_dim = dim
        self.maxPOS = 12      # Maximum POS
        self.addition_words = ['<PAD>','<SOS>','<EOS>','<UNK>']
        
        for POS in range(self.maxPOS):
            self.addition_words.append(str(POS))
        
        for addition_word in self.addition_words:
            if addition_word not in words:
                self.word_dict[addition_word] = len(self.word_list)
                self.word_list.append(addition_word)
        
        for word in words:
            if word not in self.word_dict:
                self.word_dict[word] = len(self.word_list)
                self.word_list.append(word)
                
        self.vectors = torch.nn.init.uniform_(
                torch.empty(len(self.word_dict),dim))
        
    def to_index(self, word):
        # single word tokenize
        if word not in self.word_dict:
            return self.word_dict['<UNK>']
        
        return self.word_dict[word]
        
    def tokenize(self, words):
        # whole sentence tokenize
        return [self.to_index(w) for w in words]
    
    def to_word(self, idx):
        
        return self.word_list[idx]
        
    def unTokenize(self,ids):
        
        return [self.to_word(idx) for idx in ids]
        
    def get_vocabulary_size(self):
        return self.vectors.shape[0]
    
    def get_dim(self):
        return self.vectors.shape[1]

In [5]:
# establish embedder to tokenize
embedder = Embedding(words=words,dim=300)

PAD = embedder.to_index('<PAD>')
SOS = embedder.to_index('<SOS>')
EOS = embedder.to_index('<EOS>')

In [6]:
all_set = []
two_hint_ratio = 0.0

for former,latter in zip(corpus[:-1],corpus[1:]):
    
    n = len(latter)
    addition_tokens = []
    
    selected_idice = random.sample(list(range(min(n,embedder.maxPOS))), k=1)
    selected_idx = selected_idice[0]
    
    a = random.randint(a=0,b=selected_idx+1)
    b = selected_idx + 1 - a
    
    addition_tokens.append(str(a))
    addition_tokens.append(str(b))
    addition_tokens.append(latter[selected_idx])
    
    
    former = ['<SOS>'] + former + ['<EOS>'] + addition_tokens
    latter = ['<SOS>'] + latter + ['<EOS>']
    

    
    all_set.append((former,latter))

In [7]:
from sklearn.model_selection import train_test_split

train_set,valid_set = train_test_split(all_set,test_size=0.2,random_state=42)

print(len(all_set),len(train_set),len(valid_set))

741714 593371 148343


In [9]:
# list of turple : [(x0,y0),(x1,y1),(x2,y2),....]
all_set[0:5]

[(['<SOS>',
   '心',
   '疼',
   '你',
   '还',
   '没',
   '挣',
   '脱',
   '思',
   '念',
   '的',
   '囚',
   '禁',
   '<EOS>',
   '1',
   '1',
   '在'],
  ['<SOS>',
   '他',
   '在',
   '你',
   '一',
   '段',
   '难',
   '忘',
   '远',
   '行',
   '最',
   '后',
   '却',
   '离',
   '去',
   '<EOS>']),
 (['<SOS>',
   '他',
   '在',
   '你',
   '一',
   '段',
   '难',
   '忘',
   '远',
   '行',
   '最',
   '后',
   '却',
   '离',
   '去',
   '<EOS>',
   '1',
   '7',
   '这'],
  ['<SOS>', '你', '无', '力', '依', '靠', '在', '我', '这', '里', '<EOS>']),
 (['<SOS>',
   '你',
   '无',
   '力',
   '依',
   '靠',
   '在',
   '我',
   '这',
   '里',
   '<EOS>',
   '2',
   '0',
   '着'],
  ['<SOS>', '隔', '着', '刚', '被', '雨', '淋', '湿', '的', '玻', '璃', '<EOS>']),
 (['<SOS>',
   '隔',
   '着',
   '刚',
   '被',
   '雨',
   '淋',
   '湿',
   '的',
   '玻',
   '璃',
   '<EOS>',
   '2',
   '3',
   '到'],
  ['<SOS>', '你', '问', '了', '我', '到', '底', '爱', '在', '哪', '里', '<EOS>']),
 (['<SOS>',
   '你',
   '问',
   '了',
   '我',
   '到',
   '底',
   '爱',
   '在',
   '哪',
   '里',


In [10]:

class SentDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        
        return self.data[index]
        
    def collate_fn(self, datas):
        # get max length in this batch
        max_data_len = max([len(data[0]) for data in datas])
        max_label_len = max([len(data[1]) for data in datas])
        
        batch_x = []
        batch_y = []
        len_x = []
        len_y = []
        batch_y_ = []
        
        
        for data,label in datas:
            
            len_x.append(len(data))
            len_y.append(len(label))
            
            # Tokenize
            pad_data = embedder.tokenize(data)
            pad_label = embedder.tokenize(label)
            
            # Padding data and label
            if len(data) < max_data_len:
                pad_data.extend([PAD] * (max_data_len-len(data)))
            if len(label) < max_label_len:
                pad_label.extend([PAD] * (max_label_len-len(label)))
                
                
            batch_x.append(pad_data)
            batch_y.append(pad_label)
            
            # generate y_
            focus_designate = [PAD] * len(pad_label)
            focus_designate[0] = SOS
            focus_designate[pad_label.index(EOS)] = EOS
            
            idx = int(data[data.index('<EOS>')+1]) + int(data[data.index('<EOS>')+2])
            focus_designate[idx] = pad_label[idx]
            
            batch_y_.append(focus_designate)
            
            
        return torch.LongTensor(batch_x), torch.LongTensor(len_x), torch.LongTensor(batch_y), len_y, torch.LongTensor(batch_y_)

In [11]:
# For Validate~~~~

dataset = SentDataset(train_set)
dataloader = DataLoader(dataset=dataset,
                        batch_size=4,
                        shuffle=True,
                        collate_fn=dataset.collate_fn,
                        num_workers=0)
for x,x_len,y,y_len,y_ in dataloader:
    print('Sentence lenght:',x_len,y_len,'\n')
    
    for xi,yi,y_i in zip(x,y,y_):      
        
        print(embedder.unTokenize(xi))
        print(embedder.unTokenize(yi))
        print(embedder.unTokenize(y_i),'\n')
    
    break


Sentence lenght: tensor([11, 14, 15, 18]) [8, 7, 8, 12] 

['<SOS>', '我', '没', '有', '六', '尺', '高', '<EOS>', '3', '0', '会', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '我', '却', '会', '待', '你', '好', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '<PAD>', '<PAD>', '会', '<PAD>', '<PAD>', '<PAD>', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'] 

['<SOS>', '替', '我', '解', '开', '心', '中', '的', '孤', '单', '<EOS>', '1', '3', '白', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '是', '谁', '明', '白', '我', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '<PAD>', '<PAD>', '<PAD>', '白', '<PAD>', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'] 

['<SOS>', '爱', '是', '不', '是', '不', '开', '口', '才', '珍', '贵', '<EOS>', '0', '3', '我', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '再', '给', '我', '两', '分', '钟', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
['<SOS>', '<PAD>', '<PAD>', '我', '<PAD>', '<PAD>', '<PAD>', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'] 

['<SOS>',

In [12]:
from torch.nn.utils.rnn import  pack_padded_sequence, pad_packed_sequence

class Encoder(nn.Module):

    def __init__(self, vocab_size, embedding_size, output_size):
        
        super(Encoder, self).__init__()

        self.vocab_size = vocab_size
        self.embedding = nn.Embedding(vocab_size,embedder.get_dim())
        self.embedding.weight = nn.Parameter(embedder.vectors)
        self.gru = nn.GRU(embedding_size, output_size,batch_first=True, bias=False)

    def forward(self, input_seqs, input_lengths, hidden=None):
        
        # Sort mini-batch by input_lengths
        sorted_input_lengths, indices = torch.sort(input_lengths,descending=True)
        _, desorted_indices = torch.sort(indices, descending=False)
        input_seqs = input_seqs[indices]
        
        # Encoder work
        embedded = self.embedding(input_seqs)
        packed = pack_padded_sequence(embedded, sorted_input_lengths.cpu().numpy(), batch_first=True)
        packed_outputs, hidden = self.gru(packed, hidden)
        outputs, output_lengths = pad_packed_sequence(packed_outputs,batch_first=True)
        
        # Desort mini-batch
        outputs = outputs[desorted_indices]
        hidden = hidden[:,desorted_indices]
        
        return outputs, hidden

In [13]:
class Decoder(nn.Module):

    def __init__(self, hidden_size, output_size, teacher_forcing_ratio=0.5):
        super(Decoder, self).__init__()

        self.hidden_size = hidden_size
        self.output_size = output_size
        self.embedding = nn.Embedding(embedder.get_vocabulary_size(),embedder.get_dim()) # Unused
        self.embedding.weight = nn.Parameter(embedder.vectors)
        self.cell = nn.GRUCell(embedder.get_dim(), hidden_size, bias=False)
        self.clf = nn.Linear(hidden_size, output_size, bias=False)
        
        if hidden_size == embedder.vectors.T.shape[0]:
            self.clf.weight = nn.Parameter(embedder.vectors)

        self.log_softmax = nn.LogSoftmax(dim=1)  # work with NLLLoss

        self.teacher_forcing_ratio = teacher_forcing_ratio

    def forward_step(self, inputs, hidden):
        
        # Unused
        embedded = self.embedding(inputs)
        # For research : all x to 0
        embedded = torch.zeros_like(embedded)
        
        hidden = self.cell(embedded, hidden) # [B,Hidden_dim]
        clf_output = self.clf(hidden) # [B,Output_dim]
        output = self.log_softmax(clf_output)

        return output, hidden

    def forward(self, context_vector, target_vars, target_lengths):

        batch_size = context_vector.shape[1]
        
        decoder_input = torch.LongTensor([SOS] * batch_size).to(device)
        decoder_hidden = context_vector.squeeze(0)

        if target_lengths is None:
            max_target_length = 50
        else:
            max_target_length = max(target_lengths)
        decoder_outputs = []
        decoder_hiddens = []

        use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False
        
        for t in range(max_target_length):    
            
            decoder_outputs_on_t, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_outputs_on_t)
            decoder_hiddens.append(decoder_hidden)
            
            # Take input for next GRU iteration
            if use_teacher_forcing :
                decoder_input = target_vars[:,t]
            else:
                decoder_input = decoder_outputs_on_t.argmax(-1)
            
            # Early Stop when all predict <EOS> 
            if torch.all(decoder_input==EOS) and target_lengths is None and self.train() == False:
                break
            
        # Stack output of each word at dimension 2
        decoder_outputs = torch.stack(decoder_outputs,dim=2)
        # Stack hidden of each timestep at dimension 1
        decoder_hiddens = torch.stack(decoder_hiddens,dim=1)
        
        return decoder_outputs, decoder_hiddens

In [14]:
class Seq2Seq(nn.Module):
    def __init__(self,encoder,decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    
    def forward(self, input_seqs, input_lengths, target_seqs=None, target_lengths=None):
        outputs, hidden = encoder(input_seqs, input_lengths)
        outputs, hiddens = decoder(hidden, target_seqs, target_lengths)
        return outputs,hiddens

In [15]:
from torch.utils.data import DataLoader
from tqdm import tqdm_notebook as tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

context_dim = 128

encoder = Encoder(embedder.get_vocabulary_size(),embedder.get_dim(),output_size=context_dim)
decoder = Decoder(context_dim,embedder.get_vocabulary_size(),0.5)
model = Seq2Seq(encoder,decoder)
model.to(device)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(6575, 300)
    (gru): GRU(300, 128, bias=False, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(6575, 300)
    (cell): GRUCell(300, 128, bias=False)
    (clf): Linear(in_features=128, out_features=6575, bias=False)
    (log_softmax): LogSoftmax()
  )
)

# Training

In [None]:

def run_epoch(epoch,dataset,isTraining):
    
    if isTraining:
        model.train()
    else:
        model.eval()
        
    dataloader = DataLoader(dataset=dataset,
                            batch_size=32,
                            shuffle=True,
                            collate_fn=dataset.collate_fn,
                            num_workers=0)
    
    if isTraining:
        desc='Train {}'
    else:
        desc='Valid {}'
    
    trange = tqdm(enumerate(dataloader), total=len(dataloader),desc=desc.format(epoch))
    
    loss=0
    acc = 0
    
    for i,(x,x_len,y,y_len,y_) in trange:
        
        x = x.to(device)
        y = y.to(device)
        y_ = y_.to(device)
        
        # outputs : [b,emb,s] , hiddens : [b,s,hidden]
        outputs,hiddens = model(x,x_len,y,y_len)
        
        idx = y_>2
        tf_map = y_[idx] == outputs.argmax(1)[idx]
        batch_acc = tf_map.sum().cpu().float().numpy()/len(tf_map)
        acc += batch_acc
        
        batch_loss_all = criterion(outputs, y)
        batch_loss_designated = criterion(outputs, y_)
        batch_loss = (1-focus_ratio)*batch_loss_all + focus_ratio*batch_loss_designated
        
        if isTraining:
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()
        
        loss += batch_loss.item()
        
        trange.set_postfix({'loss':loss/(i+1),'accuracy':acc/(i+1)})
        
        if isTraining:
            history_loss['train'].append(batch_loss.item())
            history_acc['train'].append(batch_acc)
        else:
            history_loss['valid'].append(batch_loss.item())
            history_acc['valid'].append(batch_acc)

In [None]:
# Training

import os

dataset_all = SentDataset(all_set)
dataset_train = SentDataset(train_set)
dataset_valid = SentDataset(valid_set)

criterion = torch.nn.NLLLoss(ignore_index=PAD, size_average=True)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
max_epoch = 20
focus_ratio = 0.5

history_loss = {'train':[],'valid':[]}
history_acc = {'train':[],'valid':[]}


for epoch in range(max_epoch):
    
    # Training
    run_epoch(epoch,dataset=dataset_train,isTraining=True)
    
    # Validation
    run_epoch(epoch,dataset=dataset_valid,isTraining=False)
    
    # Saving
    if not os.path.exists('model'):
        os.makedirs('model')
    torch.save(model.state_dict(), 'model/model.pkl.{}'.format(epoch))

# Plot (Loss and acc)

In [None]:
import matplotlib.pyplot as plt

modes = ['train', 'valid']
recs = [history_loss, history_acc]
names = ['Loss', 'Accuracy']

values = []
for mode in modes:
    v = []
    for rec in recs:
        v.append(rec[mode])
    values.append(v)

plt.figure(figsize=(32, 4))
plt.subplots_adjust(left=0.02, right=0.999)
for r, name in enumerate(names):
    plt.subplot(1, len(recs), r+1)
    for m in range(len(modes)):
        plt.plot(values[m][r])
    plt.title(name)
    plt.legend(modes)
    plt.xlabel('iteration')
    plt.show()
#plt.savefig('figure.png', dpi=100)

# Inference Test Data
### Define test data dataloader

In [16]:
class TestDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]
        
    def collate_fn(self, datas):
        max_data_len = max([len(data) for data in datas])
        batch_x = []
        len_x = []
        
        for data in datas:
            len_x.append(len(data))
            pad_data = [embedder.to_index(w) for w in data]
            if len(data) < max_data_len:
                pad_data.extend([PAD] * (max_data_len-len(data)))
            batch_x.append(pad_data)

        return torch.LongTensor(batch_x), torch.LongTensor(len_x)

## Load pre-trained model

In [17]:
path_pkl = 'pre-train/model.pkl.2-2-additional'
model.load_state_dict(torch.load(path_pkl))
model.decoder.teacher_forcing_ratio = 0.0
model.eval()

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(6575, 300)
    (gru): GRU(300, 128, bias=False, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(6575, 300)
    (cell): GRUCell(300, 128, bias=False)
    (clf): Linear(in_features=128, out_features=6575, bias=False)
    (log_softmax): LogSoftmax()
  )
)

In [18]:
import torch.nn.functional as F


def Deconstruction(model,x,x_len):
    
    # Encoder 
    encoder_hiddens, context = model.encoder(x,x_len)
    
    # Decoder
    decoder_hidden = context.squeeze(0)
    
    # Collection signal
    decoder_outputs = []
    decoder_hiddens = []
    decoder_resetGates = []
    decoder_updateGates = []
    decoder_newGates = []
    
    while True:
        
        # GRU Cell
        gru = model.decoder.cell
        
        U_h = F.linear(decoder_hidden, gru.weight_hh)
        Ur_h, Uz_h, Un_h = U_h.chunk(3, 1)
        reset_gate = torch.sigmoid(Ur_h)
        update_gate = torch.sigmoid(Uz_h)
        new_gate = torch.tanh(reset_gate * Un_h)
        decoder_hidden = new_gate + update_gate * (decoder_hidden - new_gate)
        
        # Classifier
        clf_output = model.decoder.clf(decoder_hidden)
        decoder_output = model.decoder.log_softmax(clf_output)
        
        decoder_resetGates.append(reset_gate)
        decoder_updateGates.append(update_gate)
        decoder_newGates.append(new_gate)
        decoder_outputs.append(decoder_output)            
        decoder_hiddens.append(decoder_hidden)
                                                   
        if torch.all(decoder_output.argmax(-1)==EOS) == True:
            break
            
    outputs = torch.stack(decoder_outputs,dim=2)             # (b,6xxx,s)
    
    gru_info = {
        'hiddens':torch.stack(decoder_hiddens,dim=2),             # (b,128,s)
        'resetgates':torch.stack(decoder_resetGates,dim=2),       # (b,128,s)
        'updategates':torch.stack(decoder_updateGates,dim=2),     # (b,128,s)
        'newgates':torch.stack(decoder_newGates,dim=2)            # (b,128,s)
    }
    
    return outputs, gru_info

## Generate certain condition valid datas ( by designated word / position filter )

In [19]:
certain_set = []

designated_word = '乐'

for sent in random.sample(corpus, k=16):
    
    designated_POS = random.randint(a=1,b=10)
    designated_POS1 = random.randint(a=0,b=designated_POS)
    designated_POS2 = designated_POS - designated_POS1
    
    control_signal = [str(designated_POS1) , str(designated_POS2) , designated_word]
    
    data = ['<SOS>'] + sent + ['<EOS>'] + control_signal
    
    print(data)
    
    certain_set.append(data)
    
dataset_certain = TestDataset(certain_set)

['<SOS>', '受', '不', '了', '看', '见', '你', '背', '影', '来', '到', '<EOS>', '1', '3', '乐']
['<SOS>', '合', '明', '明', '无', '余', '地', '再', '过', '问', '<EOS>', '0', '5', '乐']
['<SOS>', '春', '风', '扬', '起', '你', '我', '的', '离', '别', '<EOS>', '2', '2', '乐']
['<SOS>', '旧', '爱', '再', '莫', '忆', '心', '中', '<EOS>', '6', '4', '乐']
['<SOS>', '告', '诉', '你', '我', '心', '里', '一', '直', '都', '懂', '你', '<EOS>', '6', '1', '乐']
['<SOS>', '任', '旧', '日', '万', '念', '俱', '灰', '也', '经', '过', '<EOS>', '1', '0', '乐']
['<SOS>', '错', '过', '了', '多', '少', '个', '路', '口', '<EOS>', '3', '0', '乐']
['<SOS>', '饥', '吞', '毡', '渴', '饮', '雪', '<EOS>', '1', '9', '乐']
['<SOS>', '讲', '分', '开', '可', '否', '不', '再', '用', '憾', '事', '的', '口', '吻', '<EOS>', '3', '2', '乐']
['<SOS>', '愛', '<EOS>', '5', '2', '乐']
['<SOS>', '期', '望', '带', '来', '失', '望', '的', '恶', '性', '循', '环', '<EOS>', '6', '4', '乐']
['<SOS>', '风', '起', '云', '过', '雨', '打', '湿', '过', '<EOS>', '8', '2', '乐']
['<SOS>', '水', '没', '了', '<EOS>', '8', '1', '乐']
['<SOS>', '放', '过', '你', '自'

## Prediction certain condition data

In [20]:
dataloader = DataLoader(dataset=dataset_certain,
                        batch_size=128,
                        shuffle=False,
                        collate_fn=dataset_certain.collate_fn,
                        num_workers=0)

predictions = []
trange = tqdm(dataloader, total=len(dataloader))

for x,x_len in trange:
    
    x = x.to(device)
    
    outputs,gru_info = Deconstruction(model,x,x_len)
    
    for pred in outputs.cpu().detach().numpy().argmax(1):
        predictions.append(pred)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [21]:
# Process multi-EOS tokens
predictions_set = []
for p in predictions:
    p = list(p)
    if EOS in p:
        p = p[:p.index(EOS)+1]
    else:
        p.append(EOS)
    predictions_set.append(embedder.unTokenize(p))
    

for sent_id in range(len(certain_set)):
    sent_in = ''.join(certain_set[sent_id])
    sent_pred = ''.join(predictions_set[sent_id])
    print('input:\t{}\npred:\t{}\n'.format(sent_in,sent_pred))

input:	<SOS>受不了看见你背影来到<EOS>13乐
pred:	<SOS>我的快乐<EOS>

input:	<SOS>合明明无余地再过问<EOS>05乐
pred:	<SOS>我你你快乐<EOS>

input:	<SOS>春风扬起你我的离别<EOS>22乐
pred:	<SOS>我是快乐<EOS>

input:	<SOS>旧爱再莫忆心中<EOS>64乐
pred:	<SOS>我是的的的的我的快乐<EOS>

input:	<SOS>告诉你我心里一直都懂你<EOS>61乐
pred:	<SOS>我你你你的快乐<EOS>

input:	<SOS>任旧日万念俱灰也经过<EOS>10乐
pred:	<SOS>乐<EOS>

input:	<SOS>错过了多少个路口<EOS>30乐
pred:	<SOS>我快乐<EOS>

input:	<SOS>饥吞毡渴饮雪<EOS>19乐
pred:	<SOS>我是的的的的的的快乐<EOS>

input:	<SOS>讲分开可否不再用憾事的口吻<EOS>32乐
pred:	<SOS>我是的快乐<EOS>

input:	<SOS>愛<EOS>52乐
pred:	<SOS>我我我我的快乐<EOS>

input:	<SOS>期望带来失望的恶性循环<EOS>64乐
pred:	<SOS>我是的的的的的的快乐<EOS>

input:	<SOS>风起云过雨打湿过<EOS>82乐
pred:	<SOS>我你你你你你我的快乐<EOS>

input:	<SOS>水没了<EOS>81乐
pred:	<SOS>我是的的的的的快乐<EOS>

input:	<SOS>放过你自己吧勇敢一点面对<EOS>01乐
pred:	<SOS>乐<EOS>

input:	<SOS>让惊慌的泪水不再无处可躲<EOS>52乐
pred:	<SOS>我是你我的快乐<EOS>

input:	<SOS>是我唯一的爱<EOS>30乐
pred:	<SOS>我快乐<EOS>



In [None]:
updategates = gru_info['updategates'].mean(0).detach().cpu().numpy()

resetgates = gru_info['resetgates'].mean(0).detach().cpu().numpy()

newgates = gru_info['newgates'].mean(0).detach().cpu().numpy()


output_len = updategates.shape[1]

updategates.shape , resetgates.shape , newgates.shape


import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Update Gates

plt.close('all') 
x = np.arange(output_len)

fig , ax = plt.subplots()

for i,c in enumerate(updategates):
    
    plt.plot(x, c,label='cell {}'.format(i))
    
    if (i+1)%128 == 0:
        #leg = ax.legend(loc='upper right', shadow=True)
        plt.xlim((-0.5, output_len+1))
        if i < 127:
            fig , ax = plt.subplots()


plt.rcParams["figure.figsize"] = (12,6)
plt.show()

In [None]:
# Reset Gates

plt.close('all') 
x = np.arange(output_len)

fig , ax = plt.subplots()

for i,c in enumerate(resetgates):
    
    plt.plot(x, c,label='cell {}'.format(i))
    if (i+1)%32 == 0:
        leg = ax.legend(loc='upper right', shadow=True)
        plt.xlim((-0.5, output_len+1))
        if i < 127:
            fig , ax = plt.subplots()


plt.rcParams["figure.figsize"] = (12,6)
plt.show()

In [None]:
# New Gates

plt.close('all') 
x = np.arange(output_len)

fig , ax = plt.subplots()

for i,c in enumerate(newgates):
    
    plt.plot(x, c,label='cell {}'.format(i))
    if (i+1)%128 == 0:
        #leg = ax.legend(loc='upper right', shadow=True)
        plt.xlim((-0.5, output_len+1))
        if i < 127:
            fig , ax = plt.subplots()


plt.rcParams["figure.figsize"] = (12,6)
plt.show()

In [None]:
special_cells = [18,19,42,47,48,50,57,60,62,67,68,74,71,82,88,90,91,92,93,96,97,100,116,125]

In [None]:
plt.close('all') 
x = np.arange(output_len)

for i,c in enumerate(updategates):
    
    if i in special_cells:
        plt.plot(x, c,label='cell {}'.format(i))
plt.xlim((-0.5, output_len+1))        
plt.legend(loc='upper right', shadow=True)
plt.show()

In [None]:
plt.close('all') 
x = np.arange(output_len)

for i,c in enumerate(resetgates):
    if i in special_cells:
        plt.plot(x, c,label='cell {}'.format(i))
plt.xlim((-0.5, output_len+1))        
plt.legend(loc='upper right', shadow=True)
plt.show()