In [None]:
%matplotlib inline

from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
MAX_LENGTH = 49

In [None]:
PAD_token = 0
SOS_token = 1
EOS_token = 2

class Vocab:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "PAD", 1: "SOS", 2: "EOS"}
        self.n_words = 3  # Count PAD SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
def prepareData():
    print("reading train lines...")
    lines = open("tasks_train_addprim_jump.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("tasks_test_addprim_jump.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

In [None]:
def indexesFromSentence(vocab, sentence):
    return [vocab.word2index[word] for word in sentence.split(' ')]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])

import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataset, in_voacab, out_vocab):
        self.dataset = dataset
        self.in_vocab = in_voacab
        self.out_vocab = out_vocab
        input_sentences = []
        target_sentences = []
        for line in self.dataset:
            input_sentences.append(line[0])
            target_sentences.append(line[1])
        self.input_sentences = input_sentences
        self.target_sentences = target_sentences


    def __len__(self):
        return len(self.input_sentences)

    def __getitem__(self, idx):
        input_sentence = self.input_sentences[idx]
        target_sentence = self.target_sentences[idx]

        # Convert words to indices using word2index dictionary
        input_indices = indexesFromSentence(self.in_vocab, input_sentence)
        input_indices.append(EOS_token)
        target_indices = indexesFromSentence(self.out_vocab, target_sentence)
        target_indices.append(EOS_token)

        return torch.LongTensor(input_indices).to(device), torch.LongTensor(target_indices).to(device)



In [None]:
import torch.nn as nn


class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input):
        embedded = self.dropout(self.embedding(input))

        output, hidden = self.LSTM(embedded)
        return output, hidden


In [None]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))

    def forward(self, hidden, encoder_outputs):
        # hidden: [batch size, hidden size]
        # encoder_outputs: [batch size, sequence length, hidden size]

        # Repeat hidden state for each time step in the sequence
        hidden = hidden.unsqueeze(1).repeat(1, encoder_outputs.size(1), 1)

        # Calculate energy
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)  # [batch size, hidden size, sequence length]

        # Calculate attention
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)  # [batch size, 1, hidden size]
        attention = torch.bmm(v, energy).squeeze(1)  # [batch size, sequence length]
        return F.softmax(attention, dim=1)


In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class DecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        super(DecoderLSTM, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, num_layers=1, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        #print("decoder input", decoder_input)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        if target_tensor is not None:
            target_len = target_tensor.size(1)
        else:
            target_len = 60
        #print("target len:", trg_len)
        #print("decoder hidden", decoder_hidden)

        for i in range(target_len):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)
            #print("decoder single output", decoder_output)
            #geci
            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                #print("target tensor", target_tensor)
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
                #print("Decoder input case 1 (target)", decoder_input)
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input
                #print("Decoder input case 2 (topi)", decoder_input)

        #print("decoder outputs", decoder_outputs)
        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = self.dropout(self.embedding(input))
        output = F.relu(output)
        output, hidden = self.LSTM(output, hidden)
        output = self.out(output)
        return output, hidden

In [None]:
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size):
        super(BahdanauAttention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze(2).unsqueeze(1)

        weights = F.softmax(scores, dim=-1)
        context = torch.bmm(weights, keys)

        return context, weights
    

class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attention = BahdanauAttention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []

        # Use the actual length of the target tensor if provided
        max_target_length = target_tensor.size(1) if target_tensor is not None else MAX_LENGTH

        for i in range(max_target_length):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(
                decoder_input, decoder_hidden, encoder_outputs
            )
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)

            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)  # Teacher forcing
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)

        return decoder_outputs, decoder_hidden, attentions

    def forward_step(self, input, hidden, encoder_outputs):
        embedded = self.dropout(self.embedding(input))
        query = hidden[0].permute(1, 0, 2)  # Use the hidden state (h_n) for attention
        context, attn_weights = self.attention(query, encoder_outputs)
        input_lstm = torch.cat((embedded, context), dim=2)

        output, hidden = self.lstm(input_lstm, hidden)
        output = self.out(output)

        return output, hidden, attn_weights


In [None]:
import torch.nn.utils as torch_utils

def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion, max_norm=5.0):

    total_loss = 0

    input_tensor, target_tensor = next(iter(dataloader))
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    encoder_outputs, encoder_hidden = encoder(input_tensor)
    decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)

    #print("Decoder that get passed =", decoder_outputs.view(-1, decoder_outputs.size(-1)))
    #print("target_tensor that get passed =", target_tensor.view(-1))
    loss = criterion(
        decoder_outputs.view(-1, decoder_outputs.size(-1)),
        target_tensor.view(-1)
    )

    #print("decoder_outputs", decoder_outputs)
    #print("target_tensor", target_tensor)
    loss.backward()

    # Gradient clipping for both encoder and decoder
    torch_utils.clip_grad_norm_(encoder.parameters(), max_norm)
    torch_utils.clip_grad_norm_(decoder.parameters(), max_norm)

    encoder_optimizer.step()
    decoder_optimizer.step()

    total_loss += loss.item()

    return total_loss
    #return total_loss / len(dataloader)

In [None]:
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [None]:
def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
               print_every=100, plot_every=100):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(1, n_epochs + 1):
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),
                                        epoch, epoch / n_epochs * 100, print_loss_avg))

        if epoch % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
def evaluate(encoder, decoder, test_sentences, input_lang, output_lang):
    success = 0
    success_partial = 0
    with torch.no_grad():
        for i in range(len(test_sentences)):
            #print(i)
            input_sentence = test_sentences[i][0]
            input_tensor = tensorFromSentence(input_lang, input_sentence)

            encoder_outputs, encoder_hidden = encoder(input_tensor)
            decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)

            _, topi = decoder_outputs.topk(1)
            decoded_ids = topi.squeeze()

            decoded_words = []
            for idx in decoded_ids:
                if idx.item() == EOS_token:
                    break
                decoded_words.append(output_lang.index2word[idx.item()])
            #check exact match between decoded_words and test_dataset[i][1]
            #print("Expected output: " + test_sentences[i][1])
            #print("Predicted output: " + " ".join(decoded_words))

            #check exact match
            if (decoded_words == test_sentences[i][1].split()):
                success += 1
                #print("succes ", success, "out of", i+1)
                #print("SUCCESS!")
                #print("TARGET:")
                #print(test_sentences[i][1])
                #print("PREDICTED:")
                #print(decoded_words)
            #check partial match
            if (test_sentences[i][1] in " ".join(decoded_words)):
                success_partial = success_partial + 1
                #print("partial succes ", success_partial, "out of", i+1)
                #print("PARTIAL SUCCESS!")
                #print("TARGET:")
                #print(test_sentences[i][1])
                #print("PREDICTED:")
                #print(" ".join(decoded_words))


    print(f"Exact match accuracy: {success / len(test_sentences) * 100:.2f}%")
    print(f"Partial match accuracy: {success_partial / len(test_sentences) * 100:.2f}%")
    return (success / len(test_sentences) * 100)

In [None]:
# BASELINE

def prepareData():
    print("reading train lines...")
    lines = open("tasks_train_addprim_jump.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("tasks_test_addprim_jump.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])

hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=1000, plot_every=5000)

In [None]:
#torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn")
#torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn")

In [None]:
results = []
eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_baseline = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_baseline)

In [None]:
#SETUP FOR EACH EXPERIMENT!
#1
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num1_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num1_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_1")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_1")


eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_1 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_1)


In [None]:
#SETUP FOR EACH EXPERIMENT!
#2
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num2_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num2_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_2")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_2")

eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_2 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_2)


In [None]:
#SETUP FOR EACH EXPERIMENT!
#4
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num4_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num4_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_4")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_4")

eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_4 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_4)


In [None]:
#SETUP FOR EACH EXPERIMENT!
#8
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num8_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num8_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_8")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_8")

eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_8 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_8)


In [None]:
#SETUP FOR EACH EXPERIMENT!
#16
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num16_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num16_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_16")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_16")

eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_16 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_16)


In [None]:
#SETUP FOR EACH EXPERIMENT!
#32
def prepareData():
    print("reading train lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num32_rep1.txt").read().strip().split("\n")
    train_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]

    print("reading test lines...")
    lines = open("ATNLProject-main/Data/add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num32_rep1.txt").read().strip().split("\n")
    test_pairs = [[re.sub("IN: ", '', seq.strip()) for seq in seq.split("OUT: ")] for seq in lines]
    

    print("Building dictionaries...")
    input_lang = Vocab('in')
    output_lang = Vocab('out')

    print("Data loading complete")
    
    return input_lang, output_lang, train_pairs, test_pairs

input_lang, output_lang, train_pairs, test_pairs = prepareData()

input_vocab = Vocab('IN')
output_vocab = Vocab('OUT')
for line in train_pairs:
    input_vocab.addSentence(line[0])
    output_vocab.addSentence(line[1])


hidden_size = 100
batch_size = 1

train_dataset = CustomDataset(train_pairs, input_vocab, output_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

encoder = EncoderLSTM(input_vocab.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_vocab.n_words).to(device)
#decoder_no_attn = DecoderLSTM(hidden_size, output_vocab.n_words).to(device)

train(train_dataloader, encoder, decoder, 100000, print_every=10000, plot_every=5000)

torch.save(encoder, "encoder_LSTM_exp3_wPad_wAttn_32")
torch.save(decoder, "decoder_LSTM_exp3_wPad_wAttn_32")

eval = [test_pairs[i] for i in range(len(test_pairs))]
encoder.eval()
decoder.eval()
result_32 = evaluate(encoder, decoder, eval, input_vocab, output_vocab)
results.append(result_32)


In [None]:
results

In [None]:
import matplotlib.pyplot as plt

composed_commands = ['1', '2', '4', '8', '16', '32']
accuracy_values = results[1:]

# Creating the bar plot
plt.figure(figsize=(10, 6))
plt.bar(composed_commands, accuracy_values, color='lightsteelblue')

# Adding labels and title
plt.xlabel('Number of Composed Commands Used for Training')
plt.ylabel('Accuracy on New Commands (%)')
plt.title('Zero-Shot Generalization After Adding "Jump" Commands')
plt.ylim(0, 100)  # Setting the limit for y-axis to 100% for clarity

# Display the plot
plt.show()