# Overcoming a Theoretical Limitation of Soft-Attention 

Example notebook on learning experiments for Palindrome.

In [None]:
import sys 
sys.path.append('../')
from src.transformer import StandardTransformer
import torch

## Learning PALINDROME

Define training parameters.

In [None]:
vocab = ["0", "1", "$", "&"]

epochs = 50
layers = 2
heads = 1 
d_model = 12
d_ffnn = 64  
eps = 1e-5 
scaled = False

### Generalization experiment

Initialize the Transformer.

In [None]:
transformer = StandardTransformer(len(vocab), layers, heads, d_model, d_ffnn, scaled, eps, positional="standard")
optim = torch.optim.Adam(transformer.parameters(), lr=0.0003)

Define model trainer and train the transformer.

In [None]:
from src.trainer import Trainer
from src.dataset import Dataset

trainset = Dataset(0, 100, 10, random_seed=42, train=True, data_type='palindrome', variable_lenght=False)
testset = Dataset(0, 100, 100,  random_seed=42,  train=False, data_type='palindrome', variable_lenght=False)

trainer = Trainer(0, transformer, optim, vocab, epochs, trainset, testset, verbose=0)
train_l, val_l, train_acc, val_acc = trainer.train()

Plot validation loss.

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()
plt.plot(range(epochs), train_l, color='blue', lw=2, label="Train loss")
plt.plot(range(epochs), val_l, color='orange', lw=2, label="Validation loss")
plt.yscale('log')
ax = plt.gca()

handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, frameon=False, loc='lower center',  ncol=4)
plt.show()

Plot validation accuracy.

In [None]:
fig = plt.figure()
plt.plot(range(epochs), train_acc, color='blue', lw=2, label="Train accuracy")
plt.plot(range(epochs), val_acc, color='orange', lw=2, label="Validation accuracy")
plt.ylim([0, 1.1])

ax = plt.gca()

handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, frameon=False, loc='lower center',  ncol=4)
plt.show()