In [210]:
#!pip install torch torchvision tqdm annoy gensim
import pandas as pd
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from random import sample
from collections import defaultdict
from pathlib import Path
import networkx as nx
import pickle
from itertools import permutations
import numpy as np
from random import sample
from gensim.models.keyedvectors import KeyedVectors
#from gensim.similarities.index import AnnoyIndexer
from scipy.stats import spearmanr
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from multiprocessing import Pool
import unicodedata
import re
import string
import time
import math
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [211]:
device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")

In [212]:
%%time
embedding = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=10000)

Wall time: 266 ms


In [213]:
def unicodeToAscii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
    
# 大文字を全部小文字にする
# 無駄な空白や文字じゃないやつを全部消す
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r"", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r"", s)
    return s


In [214]:
%%time
# あらかじめ入力する単語を綺麗にしておく
# 全て小文字にして、a-z以外の単語を取り除く
normalized_words = set([normalizeString(word) for word in embedding.vocab.keys()]) & embedding.vocab.keys()

Wall time: 65.8 ms


In [215]:
n_train = int(len(normalized_words) * 0.8)
n_validate = int(len(normalized_words) * 0.1)
n_test = len(normalized_words) - n_train - n_validate

all_words = list(normalized_words)
train_words = set(sample(all_words, n_train))
validate_words = set(sample(list(set(all_words) - train_words), n_validate))
test_words = normalized_words - train_words - validate_words

In [216]:
n_train, n_validate, n_test, len(normalized_words)

(5647, 705, 707, 7059)

In [217]:
# モデルパラメータの設定
all_letters = string.ascii_lowercase
n_letters = len(all_letters)
input_size = n_letters
        
def letter2tensor(letter):
    return all_letters.index(letter)

def letter2onehot(letter):
    tensor = torch.zeros(1, n_letters, device=device)
    tensor[0][all_letters.find(letter)] = 1
    return tensor

def word2input_tensors(word):
    return torch.tensor([letter2tensor(letter) for letter in word]) * 1.0

def word2input_one_hot(word):
     return torch.cat([letter2onehot(l) for l in word], dim=0)
    
def word2target_tensor(word):
    return torch.from_numpy(embedding[word]).view(1, -1).to(device)

#calc the time
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [218]:
# encoder(basemodel)
class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(EncoderGRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.embedding = nn.Embedding(self.input_size, self.hidden_size)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, 1)
        self.affin = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, input, hidden):
        # input is (the len of alfabet inwords, the num of type = 26, hidden = 100)
        embedded = self.embedding(input)
        output = embedded.view(-1, 1, self.hidden_size)   
        output, hidden = self.gru(output, hidden)
        # output is hiddensize * lengthofwords        
        output_splitter_prob =  F.softmax(torch.sum(output, dim = 2).view(output.size(0)))
        output_weighted = output.view(output.size(0), self.hidden_size) * output_splitter_prob.view(output.size(0),1)
        # hr (lenof word * hidden)
        sum_hr = torch.sum(output_weighted, dim = 0)
        output = self.affin(sum_hr.view(self.hidden_size))        
        return output, output_splitter_prob

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [219]:
# encoder(bidirectional)
class EncoderGRU_second(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(EncoderGRU_second, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.embedding = nn.Embedding(self.input_size, self.hidden_size)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, 1, dropout = 0.3)
        
        # addtional NN for the outputs to get their weights
        
        self.affin_prob_pre = nn.Linear(self.hidden_size, 10)
        self.affin_activate = nn.LeakyReLU()
        self.affin_prob_suf = nn.Linear(10, 1)
        
        #last affine
        self.affin = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, input, hidden):
        #dynamical net: initialize
        self.output_affin_list = []
        
        
        # input is (the len of alfabet inwords, the num of type = 26, hidden = 100)
        embedded = self.embedding(input)
        output = embedded.view(-1, 1, self.hidden_size)   
        output, hidden = self.gru(output, hidden)
        
        
        # output is hiddensize * length of words
        #torch_sum = torch.sum(output, dim = 2).view(output.size(0))
        
        #this time, try NN as the weight of output
        output_pre_affin = self.affin_prob_pre(output.view(-1, self.hidden_size))
        output_activate = self.affin_activate(output_pre_affin)
        torch_sum = self.affin_prob_suf(output_activate)
        
        
        output_splitter_prob =  F.softmax(torch_sum.view(torch_sum.size(0)))
        
        output_weighted = output.view(output.size(0), self.hidden_size) * output_splitter_prob.view(output.size(0),1)
        
        
        # hr (lenof word * hidden)
        sum_hr = torch.sum(output_weighted, dim = 0)
        output = self.affin(sum_hr.view(self.hidden_size)) 
        
        return output, output_splitter_prob

    def initHidden(self):
        # 2 means bidirectional
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [220]:
class EncoderGRU_third(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(EncoderGRU_third, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.embedding = nn.Embedding(self.input_size, self.hidden_size)
        self.gru = nn.LSTM(self.hidden_size, self.hidden_size, 1, dropout = 0.3)
        
        # addtional NN for the outputs to get their weights
        
        self.affin_prob_pre = nn.Linear(self.hidden_size, 1)
        self.threshold = nn.Threshold(0.20, 0)
        #self.affin_activate = nn.Hardtanh()
        #self.affin_prob_suf = nn.Linear(10, 1)
        
        #last affine
        self.affin = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, input, hidden, cell):
        #dynamical net: initialize
        self.output_affin_list = []
        
        
        # input is (the len of alfabet inwords, the num of type = 26, hidden = 100)
        embedded = self.embedding(input)
        output = embedded.view(-1, 1, self.hidden_size)   
        output, hidden = self.gru(output, (hidden, cell))
        
        
        # output is hiddensize * length of words
        torch_sum = torch.sum(output, dim = 2).view(output.size(0))
        #this time, try NN as the weight of output
        
        
        #output_pre_affin = self.affin_prob_pre(output.view(-1, self.hidden_size))
        #torch_sum = output_pre_affin
        
        #output_activate = self.affin_activate(output_pre_affin)
        #torch_sum = self.affin_prob_suf(output_activate)
        # tanh -> relu
        
        
        output_splitter_prob = F.softmax(torch_sum.view(torch_sum.size(0)))
        output_splitter_prob = self.threshold(F.relu(output_splitter_prob))
        output_weighted = output.view(output.size(0), self.hidden_size) * output_splitter_prob.view(output.size(0),1)
        
        
        # hr (len of word * hidden)
        sum_hr = torch.sum(output_weighted, dim = 0)
        output = self.affin(sum_hr.view(self.hidden_size)) 
        
        return output, output_splitter_prob

    def initHidden(self):
        # 2 means bidirectional
        #return torch.zeros(1, 1, self.hidden_size, device=device)
        return(torch.zeros(1,1, self.hidden_size, device = device), torch.zeros(1,1,self.hidden_size, device = device))

In [221]:
class EncoderGRU_forth(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(EncoderGRU_forth, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.gru = nn.LSTM(self.input_size, self.hidden_size, 1, dropout = 0.3)
        
        # addtional NN for the outputs to get their weights
        
        self.affin_prob_pre = nn.Linear(self.hidden_size, 1)
        self.threshold = nn.Threshold(0.20, 0)
        #self.affin_activate = nn.Hardtanh()
        #self.affin_prob_suf = nn.Linear(10, 1)
        
        #last affine
        self.affin = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, input, hidden, cell):
        # input is (the len of alfabet inwords, the num of type = 26, hidden = 100)
        output, hidden = self.gru(input.view(input.size(0),1,-1) , (hidden, cell))
        
        
        # output is hiddensize * length of words
        torch_sum = torch.sum(output, dim = 2).view(output.size(0))
        #this time, try NN as the weight of output
        
        
        #output_pre_affin = self.affin_prob_pre(output.view(-1, self.hidden_size))
        #torch_sum = output_pre_affin
        
        #output_activate = self.affin_activate(output_pre_affin)
        #torch_sum = self.affin_prob_suf(output_activate)
        # tanh -> relu
        
        
        output_splitter_prob = F.softmax(torch_sum.view(torch_sum.size(0)))
        output_splitter_prob = self.threshold(F.relu(output_splitter_prob))
        output_weighted = output.view(output.size(0), self.hidden_size) * output_splitter_prob.view(output.size(0),1)
        
        
        # hr (len of word * hidden)
        sum_hr = torch.sum(output_weighted, dim = 0)
        output = self.affin(sum_hr.view(self.hidden_size)) 
        
        return output, output_splitter_prob

    def initHidden(self):
        # 2 means bidirectional
        #return torch.zeros(1, 1, self.hidden_size, device=device)
        return(torch.zeros(1,1, self.hidden_size, device = device), torch.zeros(1,1,self.hidden_size, device = device))

In [222]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, encoder_optimizer, criterion):
    encoder_hidden, encoder_cell = encoder.initHidden()

    encoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0
    
    encoder_output, splitter_prob = encoder(input_tensor, encoder_hidden,encoder_cell)
    loss = criterion(encoder_output, target_tensor.view(300))
    loss.backward()

    encoder_optimizer.step()
    return loss.item() / target_length

In [223]:
def trainIters(encoder, all_words, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adagrad(encoder.parameters(), lr=learning_rate)
    
    # all_word

    criterion = nn.SmoothL1Loss()
    for epoch in range(n_iters):
        for iter in range(len(all_words)):
            input_tensor = torch.tensor(word2input_one_hot(all_words[iter]),device=device)
            target_tensor = word2target_tensor(all_words[iter])
        
            loss = train(input_tensor, target_tensor, encoder, encoder_optimizer, criterion)
        
        
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total #/ print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, (iter + 1) / n_iters), iter, (iter+1) / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0
                
    showPlot(plot_losses)

In [224]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [225]:
hidden_size = 300
input_size = 26
out_put_size = 300
n_iters = 3


encoder1 = EncoderGRU_forth(input_size , hidden_size, out_put_size).to(device)

trainIters(encoder1, all_words, n_iters= n_iters, print_every=100)

  


0m 0s (- 0m 0s) (0 33%) 0.0147
0m 5s (- -1m 54s) (100 3366%) 1.4353
0m 10s (- -1m 49s) (200 6700%) 1.2904
0m 15s (- -1m 44s) (300 10033%) 1.4368
0m 20s (- -1m 39s) (400 13366%) 1.4301
0m 25s (- -1m 34s) (500 16700%) 1.4521
0m 30s (- -1m 29s) (600 20033%) 1.3561
0m 36s (- -1m 24s) (700 23366%) 1.4026
0m 41s (- -1m 18s) (800 26700%) 1.3943
0m 46s (- -1m 14s) (900 30033%) 1.3852
0m 50s (- -1m 9s) (1000 33366%) 1.3245
0m 55s (- -1m 4s) (1100 36700%) 1.4115
1m 1s (- -2m 58s) (1200 40033%) 1.3847
1m 7s (- -2m 53s) (1300 43366%) 1.4446
1m 12s (- -2m 47s) (1400 46700%) 1.4232
1m 17s (- -2m 42s) (1500 50033%) 1.3575
1m 22s (- -2m 37s) (1600 53366%) 1.3567
1m 28s (- -2m 32s) (1700 56700%) 1.2804
1m 33s (- -2m 26s) (1800 60033%) 1.4218
1m 38s (- -2m 21s) (1900 63366%) 1.5293
1m 43s (- -2m 16s) (2000 66700%) 1.3525
1m 49s (- -2m 11s) (2100 70033%) 1.3566
1m 54s (- -2m 5s) (2200 73366%) 1.4156
1m 59s (- -2m 0s) (2300 76700%) 1.3256
2m 5s (- -3m 55s) (2400 80033%) 1.3941
2m 11s (- -3m 49s) (2500 833

KeyboardInterrupt: 

In [228]:
def predict(input_word, encoder):
    input_tensor = torch.tensor(word2input_one_hot(input_word), dtype=torch.long, device=device)
    encoder_hidden, encoder_cell = encoder.initHidden()
    input_length = input_tensor.size(0)
    output, splitter_prob = encoder(input_tensor, encoder_hidden, encoder_cell)
    return splitter_prob


In [229]:
print(predict('working', encoder1))
print(predict('going', encoder1))
print(predict('prepare', encoder1))
print(predict('unlockable', encoder1))

  


RuntimeError: Expected object of scalar type Float but got scalar type Long for argument #4 'mat1'