In [1]:
%matplotlib inline
import importlib
from pytorch_utils_oh_2 import *

Pytorch utils oh: pytorch_utils_oh_2.py
Pytorch: 0.2.0_4


In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
MODEL_SAVE_PATH = 'whole_gen_3_chars'

In [4]:
import pytorch_utils_oh_2; importlib.reload(pytorch_utils_oh_2); from pytorch_utils_oh_2 import *;

Pytorch utils oh: pytorch_utils_oh_2.py
Pytorch: 0.2.0_4


# Data loading

In [5]:
all_data = pickle.load(open("data/en_train_fixed_1.pkl", "rb" ))
all_data_sentence_index = all_data.set_index('sentence_id')

In [6]:
categories_all = sorted(all_data["class"].unique())
print(categories_all)
print(len(categories_all))
categories_index = dict((c, i) for i, c in enumerate(categories_all))

['ELECTRONIC', 'LETTERS', 'NOT_CHANGED', 'NUMBERS', 'PLAIN', 'VERBATIM']
6


### Utils stuff

In [7]:
chars_normal, chars_normal_index = load_characters_pkl('data/en_features/chars_normal.pkl')
print(''.join(chars_normal))

<SOS><EOS>☒ !"#$%&'(),-./0123456789:;ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~£¥ª²³µº¼½¾éɒʻˈΩμ—€⅓⅔⅛


In [8]:
common_words, common_words_index = load_common_words_10k()
len(common_words)
common_words[0:10]

8192

['<EOS>', '<SOS>', '<UNK>', '<0000>', '<SAMPLE>', '.', ',', 'the', '"', 'of']

### More balanced sample

In [9]:
sample_data = all_data[all_data['class'] != 'NOT_CHANGED']
print("Data rows: {},  (dropped rows: {})".format(len(sample_data), len(all_data)-len(sample_data)))
sample_data = sample_data.reset_index(drop=True)

Data rows: 659544,  (dropped rows: 9258648)


In [10]:
balanced_data_classes_select = list(sample_data.groupby('class'))

balanced_data_accessed_counter = 0 
balanced_data_length = 0
def balanced_data_randomize(max_len=20000):
    global balanced_data, balanced_data_length, balanced_data_accessed_counter
    balanced_data = pd.concat([v.sample(min(max_len, len(v))) for k, v in balanced_data_classes_select])
    balanced_data_length = len(balanced_data)
    balanced_data_accessed_counter = 0

def balanced_data_sample_row():
    global balanced_data_accessed_counter
    global balanced_data_last_sample
    balanced_data_accessed_counter += 1
    if balanced_data_accessed_counter/balanced_data_length > 0.2:
        balanced_data_randomize()
    balanced_data_last_sample = balanced_data.iloc[random.randint(1, balanced_data_length-1)]
    return balanced_data_last_sample
    
balanced_data_randomize()

In [11]:
%%timeit
balanced_data_sample_row()

106 µs ± 2.95 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [12]:
#all_data.groupby('class')['class'].count()
#sample_data.groupby('class')['class'].count()
balanced_data.groupby('class')['class'].count()

class
ELECTRONIC     4964
LETTERS       20000
NUMBERS       20000
PLAIN         20000
VERBATIM      16950
Name: class, dtype: int64

In [13]:
balanced_data_sample_row()

sentence_id                  657708
token_id                         12
class                       NUMBERS
before                         1995
after          nineteen ninety five
class_org                      DATE
Name: 579628, dtype: object

### After words

In [14]:
tmp = list(set(sample_data['after']))
chars_after = sorted(list(set(''.join(tmp))))
chars_after = [SOS_TOKEN, EOS_TOKEN, UNKNOWN_CHAR] + chars_after
chars_after_index = dict((c, i) for i, c in enumerate(chars_after))
len(chars_after)
'_'.join(chars_after)
chars_after_index['z']

32

"<SOS>_<EOS>_☒_ _'_a_b_c_d_e_f_g_h_i_j_k_l_m_n_o_p_q_r_s_t_u_v_w_x_y_z_é"

30

In [15]:
pickle.dump(chars_after, open('data/en_features/chars_after_1.pkl', 'wb'))

In [16]:
chars_after = pickle.load(open('data/en_features/chars_after_1.pkl', 'rb'))
chars_after_index = dict((c, i) for i, c in enumerate(chars_after))

In [17]:
def after_string_to_tensor(word, include_eos=True):
    return string_to_tensor(word, chars_index=chars_after_index, include_eos=include_eos)
after_string_to_tensor('abcé').shape
after_string_to_tensor('abcé')[0, -2, -1] == 1

torch.Size([1, 5, 32])

True

In [18]:
onehot_sos = after_string_to_tensor([SOS_TOKEN], include_eos=False)
onehot_sos.size()

torch.Size([1, 1, 32])

### Samples

In [19]:
def get_random_sample():
    sample_row = balanced_data_sample_row()
    sentence_id = sample_row['class']

    rows = all_data_sentence_index.loc[sample_row['sentence_id']]
    befores = list(rows.before)
        
    token_id_idx = list(rows['token_id']).index(sample_row['token_id'])
    befores[token_id_idx] = SAMPLE_WORD_TOKEN
    
    return sample_row['before'], sample_row['after'], sample_row['class'], befores
            
def tmp():
    s_bef, s_aft, s_class, s_sentence = get_random_sample()
    print(s_class, ':', s_bef, '->', s_aft)
    print(' '.join(s_sentence))
    print(s_sentence)
    print(string_to_tensor(s_bef, chars_normal_index).shape)
    print(after_string_to_tensor(s_aft.split(' ')).shape)
tmp()

PLAIN : vol -> volume
New Phytologist , <SAMPLE> 148 , no
['New', 'Phytologist', ',', '<SAMPLE>', '148', ',', 'no']
torch.Size([1, 4, 104])
torch.Size([1, 2, 32])


# Model functions

In [20]:
use_cuda = True

### Encoder

In [21]:
class EncoderRNN(nn.Module):
    def __init__(self, words_input_size, chars_input_size, words_hidden_size, chars_hidden_size,
                 words_layers=1, chars_layers=1):
        super(EncoderRNN, self).__init__()
        
        self.words_layers = words_layers
        self.chars_layers = chars_layers
        self.words_hidden_size = words_hidden_size
        self.chars_hidden_size = chars_hidden_size

        self.rnn_words = nn.LSTM(words_input_size, words_hidden_size // 2, words_layers,
                                 batch_first=True, bidirectional=True)

        self.rnn_chars = nn.LSTM(chars_input_size, chars_hidden_size // 2, chars_layers,
                                batch_first=True, bidirectional=True)
        
    def forward(self, word_vectors, string_tensor, hidden = None, init_hidden = True):
        if init_hidden:
            hidden_words, hidden_chars = self.init_hidden()
        
        all_outputs_words, hidden_words = self.rnn_words(word_vectors, hidden_words)
        output_words = all_outputs_words[:, -1]
        
        all_outputs_chars, hidden_chars = self.rnn_chars(string_tensor, hidden_chars)
        output_chars = all_outputs_chars[:, -1]
        
        output = torch.cat((output_words, output_chars), 1)
        
        return output

    def init_hidden(self):
        var1_1 = Variable(torch.zeros(2 * self.words_layers, 1, self.words_hidden_size // 2))
        var1_2 = Variable(torch.zeros(2 * self.words_layers, 1, self.words_hidden_size // 2))
        var2_1 = Variable(torch.zeros(2 * self.chars_layers, 1, self.chars_hidden_size // 2))
        var2_2 = Variable(torch.zeros(2 * self.chars_layers, 1, self.chars_hidden_size // 2))
        
        var1_1 = var1_1.cuda(); var1_2 = var1_2.cuda()
        var2_1 = var2_1.cuda(); var2_2 = var2_2.cuda()
        return ((var1_1, var1_2), (var2_1, var2_2))

In [22]:
encoder_rnn = EncoderRNN(words_input_size=len(common_words), chars_input_size=len(chars_normal),
                         words_hidden_size=128, chars_hidden_size=256,
                         words_layers=1, chars_layers=2).cuda()
encoder_rnn

EncoderRNN (
  (rnn_words): LSTM(8192, 64, batch_first=True, bidirectional=True)
  (rnn_chars): LSTM(104, 128, num_layers=2, batch_first=True, bidirectional=True)
)

In [23]:
def test_encoder_single_sample():
    s_bef, s_aft, s_class, s_sentence = get_random_sample()
    
    words_t = words_to_tensor(list(s_sentence), common_words_index)
    words_t = Variable(words_t).cuda()
    
    string_t = string_to_tensor(s_bef, chars_normal_index)
    string_t = Variable(string_t).cuda()
    
    return encoder_rnn(words_t, string_t)
    
encoder_output = test_encoder_single_sample()
encoder_output.size()

torch.Size([1, 384])

### Decoder

In [24]:
class DecoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        
        self.rnn = nn.GRU(input_size, hidden_size, n_layers,
                                 batch_first=True, bidirectional=False)
        
        self.lin_out = nn.Linear(hidden_size, input_size)
        #self.softmax = nn.LogSoftmax()

    def forward(self, char, hidden):
        #char = char.view(1,1,-1)
        #hidden = hidden.view(1,1,-1)
        output, hidden = self.rnn(char, hidden)
        output = output[:, -1] # view(1,-1)
        output = self.lin_out(output)
        output = F.log_softmax(output)
        return output, hidden
    
    def init_rest_hidden(self, input_var):
        hid_var = Variable(torch.zeros(self.n_layers - 1, 1, self.hidden_size)).cuda()
        res = torch.cat((input_var, hid_var), 0)
        return res
        

decoder_rnn = DecoderRNN(input_size=len(chars_after), hidden_size=128+256, n_layers=2)
decoder_rnn = decoder_rnn.cuda()
decoder_rnn

DecoderRNN (
  (rnn): GRU(32, 384, num_layers=2, batch_first=True)
  (lin_out): Linear (384 -> 32)
)

In [25]:
tmp_hiddens = decoder_rnn.init_rest_hidden(encoder_output.view(1,1,-1))
tmp_a, tmp_b = decoder_rnn(Variable(onehot_sos).cuda(), tmp_hiddens)
print(tmp_a.size())
print(tmp_b.size())
print(tmp_a.topk(1)[1])
print(chars_after[tmp_a.topk(1)[1].data[0][0]])

torch.Size([1, 32])
torch.Size([2, 1, 384])
Variable containing:
 13
[torch.cuda.LongTensor of size 1x1 (GPU 0)]

i


# Training etc

### Accuracy

In [26]:
def test_model_single_sample(model=None):
    s_bef, s_aft, s_class, s_sentence = sample = get_random_sample()
        
    #words_t = words_to_tensor(list(s_sentence), common_words_index)
    words_t = words_to_tensor(sentence_arr_tokenize(s_sentence), common_words_index)
    words_t = Variable(words_t).cuda()
    
    string_t = string_to_tensor(s_bef, chars_normal_index)
    string_t = Variable(string_t).cuda()
    
    encoder_output = encoder_rnn(words_t, string_t)
    
    encoder_output = encoder_output.view(1,1,-1)
    
    decoder_hidden = decoder_rnn.init_rest_hidden(encoder_output)
    decoder_input = Variable(onehot_sos).cuda()

    decoded_output = []
    max_length = 20
    for _ in range(max_length):
        decoder_output, decoder_hidden = decoder_rnn(decoder_input, decoder_hidden)
        #return decoder_output

        topv, topi = decoder_output.data.topk(1)
        char_index = topi[0][0]
        char = chars_after[char_index] # Use own prediction as next input
                
        if char == EOS_TOKEN:
            break

        decoded_output.append(char)
        
        decoder_input = after_string_to_tensor([char], include_eos=False)
        decoder_input = Variable(decoder_input).cuda()
    
    output = ''.join(decoded_output)
    return output, output, s_aft, sample
    
tmp = test_model_single_sample(None)
tmp

('iiiiiiiiiiiiiiiiiiii',
 'iiiiiiiiiiiiiiiiiiii',
 'f c c',
 ('FCC',
  'f c c',
  'LETTERS',
  ['<SAMPLE>', 'Media', 'Bureau', 'CDBS', 'Database', '.']))

In [27]:
def print_local_wrong_predictions(max_results=10):
    arr = get_some_wrong_predictions(None, test_model_single_sample, max_iterations=10000, max_results=max_results)
    for sample, predict, output in arr:
        s_bef, s_aft, s_class, s_sentence = sample
        print("{:<14} => {:<14} || {} \n{:>17} {}".format(s_bef, predict, s_aft, '', ' '.join(s_sentence), ))


In [28]:
print_local_wrong_predictions(2)

honour         => iiiiiiiiiiiiiiiiiiii || honor 
                  " Carl Dickinson stunned by Port Vale song in his <SAMPLE> " .
st             => iiiiiiiiiiiiiiiiiiii || saint 
                  Naom Sindalovskii quotes a telling local <SAMPLE> Petersburg legend about building's construction .


In [29]:
%%time
test_model_accuracy(encoder_rnn, test_model_single_sample)

Accuracy: 0.00% (       0/   10000)
CPU times: user 9min 46s, sys: 4.74 s, total: 9min 50s
Wall time: 2min 27s


0.0

### Training

In [30]:
def train(s_bef, s_aft, s_sentence, encoder_optimizer, decoder_optimizer, loss_function,
          use_teacher_forcing, max_length=20):
    
    #words_t = words_to_tensor(list(s_sentence), common_words_index)
    words_t = words_to_tensor(sentence_arr_tokenize(s_sentence), common_words_index)
    words_t = Variable(words_t).cuda()
    
    string_t = string_to_tensor(s_bef, chars_normal_index)
    string_t = Variable(string_t).cuda()
    
    encoder_output = encoder_rnn(words_t, string_t)
    encoder_output = encoder_output.view(1,1,-1)
    
    decoder_hidden = decoder_rnn.init_rest_hidden(encoder_output)
    decoder_input = Variable(onehot_sos).cuda()
    
    ###
    
    target_arr = list(s_aft) + [EOS_TOKEN]
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    loss = 0
    
    decoded_output = []
    for i in range(len(target_arr)):
        decoder_output, decoder_hidden = decoder_rnn(decoder_input, decoder_hidden)

        decoder_target_i = chars_after_index[target_arr[i]]
        decoder_target_i = Variable(torch.LongTensor([decoder_target_i])).cuda()
        loss += loss_function(decoder_output, decoder_target_i)
        
        topv, topi = decoder_output.data.topk(1)
        char_index = topi[0][0]
        char = chars_after[char_index] # Use own prediction as next input
        decoded_output.append(char)
        
        if use_teacher_forcing:
            char = target_arr[i] # replace input with right target
        else:
            # use output normally as input 
            if char == EOS_TOKEN:
                break
                
        decoder_input = after_string_to_tensor([char], include_eos=False)
        decoder_input = Variable(decoder_input).cuda()
        
    if decoded_output[-1] == EOS_TOKEN:
        decoded_output = decoded_output[:-1]
        
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return ''.join(decoded_output), (loss.data[0] / len(target_arr))


In [31]:
def train_iterations(n_iters=100000, lr=0.001, teacher_forcing_ratio=0.5,
                     print_every=10000, plot_every=1000):

    start = time.time()
    
    decoder_rnn.train()
    encoder_rnn.train()

    current_loss = 0
    current_loss_iter = 0

    encoder_optimizer = torch.optim.Adam(encoder_rnn.parameters(), lr=lr)
    decoder_optimizer = torch.optim.Adam(decoder_rnn.parameters(), lr=lr)
    loss_function = nn.NLLLoss()
    
    for iteration in range(1, n_iters + 1):
        model_training.iterations += 1
        
        use_teacher_forcing = random.random() < teacher_forcing_ratio
        
        s_bef, s_aft, s_class, s_sentence = get_random_sample()
        
        result, loss = train(s_bef=s_bef, s_aft=s_aft, s_sentence=s_sentence,
                             encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer,
                             loss_function=nn.NLLLoss(), use_teacher_forcing=use_teacher_forcing,
                             max_length=40 )
        
        current_loss += loss
        current_loss_iter += 1

        # Print iter number, loss, name and guess
        if iteration % print_every == 0:
            teacher_forcing_str = ""
            if use_teacher_forcing:
                teacher_forcing_str = "(forcing)"
            correct = '✓' if result == s_aft else "✗: {}".format(s_aft)
            
            print("{:>6d} {:>4.0%} ({:>8}) {:>7.3f}   | {:>6.2f}: {} -> {} ({}) {}".format(
                      model_training.iterations, iteration/n_iters, time_since(start),
                      current_loss/current_loss_iter, loss,
                      s_bef, result, correct, teacher_forcing_str))

        # Add current loss avg to list of losses
        if iteration % plot_every == 0:
            model_training.losses.append(current_loss / plot_every)
            model_training.learning_rates.append(lr)
            current_loss = 0
            current_loss_iter = 0
            
        if model_training.iterations % 50000 == 0 or model_training.iterations == 10:
            model_training.save_models()
            acc = test_model_accuracy(encoder_rnn, test_model_single_sample)
            model_training.accuracy.append(acc)
    
    # test_model_accuracy(model, n_sample=10000)

In [32]:
model_training = ModelTraining(MODEL_SAVE_PATH, [encoder_rnn, decoder_rnn])

Save path: data/models/whole_gen_3_chars


In [33]:
train_iterations(n_iters=50, print_every=9, lr=0.0001)

     9  18% (   0m 0s)   3.061   |   3.43: & -> iiii (✗: and) (forcing)
Saved model to data/models/whole_gen_3_chars/10_(EncoderRNN/DecoderRNN)
Accuracy: 0.00% (       0/   10000)
    18  36% (  0m 55s)   2.962   |   3.44: # -> iii<EOS><EOS><EOS> (✗: number) (forcing)
    27  54% (  0m 55s)   2.900   |   0.41: July 1992 -> in (✗: july nineteen ninety two) 
    36  72% (  0m 55s)   2.829   |   3.41: GCRV -> nn<EOS><EOS><EOS><EOS><EOS> (✗: g c r v) (forcing)
    45  90% (  0m 55s)   2.576   |   3.37: October 31, 1980 -> nn<EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS> (✗: october thirty first nineteen eighty) (forcing)


In [34]:
train_iterations(n_iters=(1000-model_training.iterations), print_every=500, lr=0.0001)

   550  53% (  0m 15s)   2.859   |   2.72: centre -> a       (✗: center) (forcing)


In [35]:
train_iterations(n_iters=9000, lr=0.0001, print_every=1000)

  2000  11% (  0m 31s)   2.421   |   2.98: ZVIJEZDA -> t e eee e        (✗: z v i j e z d a) (forcing)
  3000  22% (   1m 2s)   2.302   |   0.78: & -> and (✓) 
  4000  33% (  1m 34s)   2.163   |   0.47: & -> and (✓) 
  5000  44% (   2m 0s)   2.064   |   2.40: colours -> t ntt<EOS> (✗: colors) (forcing)
  6000  56% (  2m 29s)   2.002   |   1.92: Centre -> ninttee (✗: center) (forcing)
  7000  67% (  2m 58s)   1.901   |   1.89: FIVB -> t i t d  (✗: f i v b) (forcing)
  8000  78% (  3m 27s)   1.752   |   1.72: UFC -> s s s (✗: u f c) (forcing)
  9000  89% (  3m 56s)   1.730   |   2.72: ο -> tum<EOS>a<EOS><EOS> (✗: omicron) (forcing)
 10000 100% (  4m 25s)   1.650   |   1.67: AI -> s p (✗: a i) (forcing)


In [36]:
train_iterations(n_iters=90000, print_every=10000)

 20000  11% (  5m 38s)   1.154   |   3.46: April 2011 -> january twenty thirt (✗: april twenty eleven) 
 30000  22% ( 11m 40s)   0.719   |   0.12: 1 -> one (✓) (forcing)
 40000  33% ( 17m 33s)   0.562   |   2.58: α -> sigma (✗: alpha) 
 50000  44% ( 23m 26s)   0.480   |   0.05: ITT -> i t t (✓) 
Saved model to data/models/whole_gen_3_chars/50000_(EncoderRNN/DecoderRNN)
Accuracy: 61.71% (    6171/   10000)
 60000  56% (  31m 1s)   0.462   |   0.00: : -> to (✓) (forcing)
 70000  67% ( 36m 53s)   0.404   |   0.00: kilometres -> kilometers (✓) 
 80000  78% ( 42m 47s)   0.371   |   0.00: - -> to (✓) (forcing)
 90000  89% ( 48m 55s)   0.377   |   1.47: Treccani.itGonzaga -> t r a d a d t d t d t e d t e n e t e  (✗: t r e c c a n i dot i t g o n z a g a) 
100000 100% ( 54m 55s)   0.361   |   0.63: Υ -> apsilon (✗: upsilon) (forcing)
Saved model to data/models/whole_gen_3_chars/100000_(EncoderRNN/DecoderRNN)
Accuracy: 70.49% (    7049/   10000)


In [37]:
train_iterations(n_iters=300000, print_every=10000, teacher_forcing_ratio=0.5, lr=0.001)

110000   3% (   6m 3s)   0.349   |   0.01: st -> saint (✓) 
120000   7% (  12m 6s)   0.320   |   0.01: T. -> t (✓) (forcing)
130000  10% (  18m 4s)   0.312   |   0.41: 12 May 2009 -> the twelfthoof jarutwo thousand nine (✗: the twelfth of may two thousand nine) (forcing)
140000  13% (  24m 1s)   0.338   |   0.41: December 1748 -> december seeenteen norty fight (✗: december seventeen forty eight) (forcing)
150000  17% ( 29m 58s)   0.345   |   0.14: December 2012 -> december twenty twelve (✓) (forcing)
Saved model to data/models/whole_gen_3_chars/150000_(EncoderRNN/DecoderRNN)
Accuracy: 71.20% (    7120/   10000)
160000  20% ( 37m 47s)   0.351   |   0.00: & -> and (✓) 
170000  23% ( 43m 34s)   0.315   |   0.00: & -> and (✓) 
180000  27% ( 49m 35s)   0.292   |   0.00: & -> and (✓) 
190000  30% ( 55m 43s)   0.345   |   0.00: - -> to (✓) (forcing)
200000  33% ( 61m 52s)   0.326   |   0.40: Pele.net -> p e l e not c e t (✗: p e l e dot n e t) (forcing)
Saved model to data/models/whole_gen_3_

In [38]:
print_local_wrong_predictions()

Suo            => sun            || s u o 
                  " <SAMPLE> " in Japan Encyclopedia , p . 916 , p . 916 , at Google Books .
CHSAS          => c h s s        || c h s a s 
                  This program offers paid internships to a select number of students from <SAMPLE> and helps staff develop resources .
realbeer.comSimpson => r e a l l i v e dot  || r e a l b e e r dot c o m s i m p s o n 
                  Hops to lighten your step beerhunter.comA Bud by any other name <SAMPLE> , Willie ( 2007 ) .
HG             => g g            || h g 
                  Khorana <SAMPLE> et al .
23 September 2004 => the twenty thirt of  || the twenty third of september two thousand four 
                  <SAMPLE> .
WVNI           => w v i i        || w v n i 
                  Joey Krol , who worked with WIBI , WLUJ , <SAMPLE> and WYLL .
η              => epsia          || eta 
                  " Τ ο μ υ σ τ η ρ ι ο τ <SAMPLE> ς Π α ι α ν ι α ς ( Paiania's mystery ) " .
KiliLive.com  

In [39]:
train_iterations(n_iters=300000, print_every=10000, teacher_forcing_ratio=0.3, lr=0.001)

410000   3% (  4m 41s)   0.422   |   0.00: C. -> c (✓) 
420000   7% (  9m 26s)   0.542   |   0.03: st -> saint (✓) (forcing)
430000  10% ( 14m 11s)   0.531   |   0.00: & -> and (✓) 
440000  13% ( 18m 52s)   0.676   |   0.01: P. -> p (✓) 
450000  17% ( 23m 32s)   0.618   |   0.51: INDOT -> i n d t t (✗: i n d o t) 
Saved model to data/models/whole_gen_3_chars/450000_(EncoderRNN/DecoderRNN)
Accuracy: 60.30% (    6030/   10000)
460000  20% ( 29m 24s)   0.597   |   0.00: CB -> c b (✓) 
470000  23% ( 34m 11s)   0.689   |   1.83: κ -> alpha (✗: kappa) 
480000  27% ( 38m 49s)   0.648   |   1.89: υ -> omtha (✗: upsilon) 
490000  30% ( 43m 31s)   0.672   |   0.43: KBOI -> b b o (✗: k b o i) 
500000  33% ( 48m 19s)   0.687   |   2.54: 2014-11-10 -> the seventh ot etenhh oh thn h enneneh (✗: the tenth of november twenty fourteen) 
Saved model to data/models/whole_gen_3_chars/500000_(EncoderRNN/DecoderRNN)
Accuracy: 59.54% (    5954/   10000)
510000  37% ( 54m 14s)   0.647   |   0.02: sq -> square

IndexError: list index out of range

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=300000, print_every=10000, teacher_forcing_ratio=0.5, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=200000, print_every=10000, teacher_forcing_ratio=0.3, lr=0.0005)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=100000, print_every=10000, teacher_forcing_ratio=0.1, lr=0.0005)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=300000, print_every=10000, teacher_forcing_ratio=0.1, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=200000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=200000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=500000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=500000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=500000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()

In [None]:
train_iterations(n_iters=500000, print_every=10000, teacher_forcing_ratio=0, lr=0.0001)

In [None]:
print_local_wrong_predictions()