In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import torch.optim as optim

import numpy as np
import pandas as pd
import ast
import math
import time
import random

from models.rnn.encoder_decoder_gru import EncoderRNN, DecoderRNN
from models.rnn.combined_networks import train
from rnn_utils import tensorsFromPair
from utils import showPlot, timeSince, asMinutes
from lang import load_data

%load_ext autoreload
%autoreload 2

In [2]:
import sys, os

# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__


# Training Loops

In [3]:
eq, seq, pairs = load_data()

In [4]:
print(pairs[:2])
print(pairs[0][0])
print(pairs[0][1])

[('24,28,32,36,40,44,48,52', '4*t+4+2*8'), ('36,37,38,39,40,41,42,43', '0+5*8-5+t')]
24,28,32,36,40,44,48,52
4*t+4+2*8


In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
# from numpy.typing import NDArray

def normalize_0_1(a: np.ndarray) -> np.ndarray:
    # Normalised [0,1]
    return (a - np.min(a))/np.ptp(a)

def normalize_1_255(a: np.ndarray) -> np.ndarray:
    # Normalised [0,255] as integer: don't forget the parenthesis before astype(int)
    return (255*(a - np.min(a))/np.ptp(a)).astype(int)

def normalize_minus1_1(a: np.ndarray) -> np.ndarray:
    # Normalised [-1,1]
    return 2.*(a - np.min(a))/np.ptp(a)-1

In [21]:
def compare_sequences(output_sequence: np.ndarray, target_sequence: np.ndarray) -> float:
        magnitude: float = 0.0

        # print(output_sequence)
        # print(target_sequence)
        combined_seq = np.array([output_sequence, target_sequence]) #concatenate((output_sequence, target_sequence))
        norm_comb_seq = normalize_0_1(combined_seq)

        norm_output_seq = norm_comb_seq[0]
        norm_target_seq = norm_comb_seq[1]
        # print(norm_comb_seq)
        # print(norm_output_seq)
        # print(norm_target_seq)
        
        for i, value in enumerate(norm_target_seq.tolist()):
            magnitude += abs(value - norm_output_seq[i])#**2

        # magnitude /= len(norm_target_seq)

        return torch.tensor(magnitude)

In [22]:
from utils import eq_to_seq, is_eq_valid

def calc_magnitude(decoder_outputs, target_outputs):
    max_penalty_magnitude = torch.tensor(9., dtype=torch.float64)
  
    decoded_output_symbols = []
    decoded_target_symbols = []
    detached_target_outputs = target_outputs.cpu().detach().numpy().squeeze()

    for decoder_output in decoder_outputs:
        topv, topi = decoder_output.data.topk(1)
        decoded_output = eq.index2word[topi.item()]
        decoded_output_symbols.append(decoded_output)
    
    for i, target_output in enumerate(detached_target_outputs):
        decoded_target = eq.index2word[target_output]
        decoded_target_symbols.append(decoded_target)
    
    stringified_output = ''.join(decoded_output_symbols)
    
    if is_eq_valid(stringified_output) == False:
        return max_penalty_magnitude

    output_sequence = eq_to_seq(stringified_output, 9)

    if np.count_nonzero(output_sequence) < 1:
        return max_penalty_magnitude
    else:
        stringified_target = ''.join(decoded_target_symbols[:-1])
        target_sequence = eq_to_seq(stringified_target, 9)

    return compare_sequences(np.array(output_sequence), np.array(target_sequence))



In [23]:
def training(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01, calc_magnitude = None):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs), seq, eq)
                      for i in range(n_iters)]
    criterion =nn.NLLLoss() #  converted_loss

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion, calc_magnitude)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            # enablePrint()
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))
            # blockPrint()

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    #showPlot(plot_losses)

In [27]:
hidden_size = 256
encoder = EncoderRNN(seq.n_words, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, eq.n_words).to(device)

training(encoder, decoder, 10000, print_every=1000, calc_magnitude = calc_magnitude)

0m 5s (- 4m 20s) (100 2%) 19.6826
0m 9s (- 3m 43s) (200 4%) 14.8882
0m 13s (- 3m 28s) (300 6%) 14.6629
0m 17s (- 3m 20s) (400 8%) 14.5336
0m 21s (- 3m 13s) (500 10%) 14.7804
0m 25s (- 3m 8s) (600 12%) 14.7919
0m 29s (- 3m 3s) (700 14%) 14.3958
0m 34s (- 2m 59s) (800 16%) 14.6135
0m 38s (- 2m 54s) (900 18%) 14.6739
0m 42s (- 2m 50s) (1000 20%) 14.5331
0m 46s (- 2m 45s) (1100 22%) 14.2140
0m 51s (- 2m 42s) (1200 24%) 14.2733
0m 55s (- 2m 38s) (1300 26%) 14.5426
1m 0s (- 2m 34s) (1400 28%) 14.0034
1m 4s (- 2m 29s) (1500 30%) 14.1626
1m 8s (- 2m 24s) (1600 32%) 14.0883
1m 12s (- 2m 20s) (1700 34%) 14.2953
1m 16s (- 2m 15s) (1800 36%) 14.1584
1m 20s (- 2m 10s) (1900 38%) 14.3359
1m 24s (- 2m 6s) (2000 40%) 14.3164
1m 28s (- 2m 1s) (2100 42%) 13.9198
1m 32s (- 1m 57s) (2200 44%) 14.1523
1m 36s (- 1m 53s) (2300 46%) 13.9615
1m 40s (- 1m 49s) (2400 48%) 13.8521
1m 45s (- 1m 45s) (2500 50%) 14.0578
1m 49s (- 1m 40s) (2600 52%) 13.7391
1m 53s (- 1m 36s) (2700 54%) 13.9265
1m 58s (- 1m 33s) (2800

In [40]:
from rnn_utils import tensorFromSentence

MAX_LENGTH = 10
input_lang = seq
output_lang = eq
EOS_token = 0
SOS_token = 1

def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            # decoder_output, decoder_hidden, decoder_attention = decoder(
            #     decoder_input, decoder_hidden, encoder_outputs)
            # decoder_attentions[di] = decoder_attention.data

            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)  # this if or simply decoder
                
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        stringified_output = ''.join(decoded_words[:-1])
        output_sequence = eq_to_seq(stringified_output, 9)

        return decoded_words, output_sequence, decoder_attentions[:di + 1]

In [51]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('=', pair[1])
        print('>', pair[0])
        output_words, output_sequence, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        output_sequence = ''.join(str(x)+',' for x in output_sequence)
        print('<', output_sequence)
        print('=', output_sentence)
        print('')

In [52]:
evaluateRandomly(encoder, decoder)

= t+9*t-t+5
> 14,23,32,41,50,59,68,77
< 1,32,243,1024,3125,7776,16807,32768,59049,
= t * t * t * t * t <EOS>

= 2*7*5*2*t
> 140,280,420,560,700,840,980,1120
< 1,32,243,1024,3125,7776,16807,32768,59049,
= t * t * t * t * t <EOS>

= t+t*2*t-7
> -4,3,14,29,48,71,98,129
< 0,4,18,48,100,180,294,448,648,
= t * t * t - t * t <EOS>

= 4-9*t-t-3
> -9,-19,-29,-39,-49,-59,-69,-79
< -1,-6,-15,-28,-45,-66,-91,-120,-153,
= t - t * t - t * t <EOS>

= 9+t+t*3*2
> 16,23,30,37,44,51,58,65
< 1,32,243,1024,3125,7776,16807,32768,59049,
= t * t * t * t * t <EOS>

= 2+t+3*7-7
> 17,18,19,20,21,22,23,24
< 2,12,36,80,150,252,392,576,810,
= t * t * t + t * t <EOS>

= t-4*1*t-1
> -4,-7,-10,-13,-16,-19,-22,-25
< -1,-6,-15,-28,-45,-66,-91,-120,-153,
= t - t * t - t * t <EOS>

= 3*4-t-0*t
> 11,10,9,8,7,6,5,4
< -1,-6,-15,-28,-45,-66,-91,-120,-153,
= t - t * t - t * t <EOS>

= 5-t+1*6+1
> 11,10,9,8,7,6,5,4
< -1,-6,-15,-28,-45,-66,-91,-120,-153,
= t - t * t - t * t <EOS>

= 1*t*7+8-t
> 14,20,26,32,38,44,50,56
< 1,32,24