# The Balanced Parentheses Problem
### and a transformer solution (Part 2)

In [1]:
import torch
from turing.translators import Translator

In [2]:
tx = Translator(T=10)
tx.projecto(tx.h("M", "(", 2, 3))

st     tensor([0., 0., 1., 0., 0., 0.]) M
sym1   tensor([1., 0., 0., 0., 0.]) (
sym2   tensor([0., 0., 0., 0., 0.]) UNK
pos1   tensor([0., 0., 1., 0.]) 2
pos2   tensor([0., 0., 1., 1.]) 3
pos3   tensor([0., 0., 0., 0.]) 0
scr1   tensor([0., 0., 0., 0., 0.]) UNK
scr2   tensor([0., 0., 0., 0., 0.]) UNK
scr3   tensor([0., 0., 0., 0.]) 0
scr4   tensor([0., 0., 0.]) None
scr5   tensor([0., 0.]) None


Claim C.4 and C.5

Let's suppose we're about to visit position l=3 at step 5. The last time we visited this position was when at step 3.

In [3]:
# We'll ignore some of the subspaces for now:
# TODO: clarify 0-indexing for meaning of step, etc.
step = 5
H = torch.zeros(step, tx.w)  # H = (h_0, h_1, ..., h_{step-1})
path = [0, 1, 2, 3, 2, 3]
for t in range(step):
    print(t)
    H[t, tx.pos1_:tx.pos2_] = torch.Tensor(tx.Bin(t))
    H[t, tx.pos2_:tx.pos3_] = torch.Tensor(tx.Bin(path[t]))
    H[t, tx.pos3_:tx.scr1_] = torch.Tensor(tx.Bin(path[t+1]))
    tx.projecto(H[t,:], subspaces=["pos1", "pos2", "pos3"]) 


0
pos1   tensor([0., 0., 0., 0.]) 0
pos2   tensor([0., 0., 0., 0.]) 0
pos3   tensor([0., 0., 0., 1.]) 1
1
pos1   tensor([0., 0., 0., 1.]) 1
pos2   tensor([0., 0., 0., 1.]) 1
pos3   tensor([0., 0., 1., 0.]) 2
2
pos1   tensor([0., 0., 1., 0.]) 2
pos2   tensor([0., 0., 1., 0.]) 2
pos3   tensor([0., 0., 1., 1.]) 3
3
pos1   tensor([0., 0., 1., 1.]) 3
pos2   tensor([0., 0., 1., 1.]) 3
pos3   tensor([0., 0., 1., 0.]) 2
4
pos1   tensor([0., 1., 0., 0.]) 4
pos2   tensor([0., 0., 1., 0.]) 2
pos3   tensor([0., 0., 1., 1.]) 3


In [4]:
tx = Translator(T=83)
tape = "B()((()(()))())E"
tx.simulate(tape)

  B()((()(()))())E
I ^                
  B()((()(()))())E
R  ^               
  B()((()(()))())E
R   ^              
  B(*((()(()))())E
M  ^               
  B**((()(()))())E
R   ^              
  B**((()(()))())E
R    ^             
  B**((()(()))())E
R     ^            
  B**((()(()))())E
R      ^           
  B**((()(()))())E
R       ^          
  B**(((*(()))())E
M      ^           
  B**((**(()))())E
R       ^          
  B**((**(()))())E
R        ^         
  B**((**(()))())E
R         ^        
  B**((**(()))())E
R          ^       
  B**((**((*))())E
M         ^        
  B**((**(**))())E
R          ^       
  B**((**(**))())E
R           ^      
  B**((**(***)())E
M          ^       
  B**((**(***)())E
M         ^        
  B**((**(***)())E
M        ^         
  B**((******)())E
R         ^        
  B**((******)())E
R          ^       
  B**((******)())E
R           ^      
  B**((******)())E
R            ^     
  B**((*******())E
M           ^      
  B**((*******())E
M     

'T'

In [5]:
tx


WCMTranslator(
  (transition): Transition(
    (linear1): Linear(in_features=59, out_features=89, bias=True)
    (linear2): Linear(in_features=89, out_features=59, bias=True)
  )
  (preprocess_for_adder): PreprocessForAdder(
    (linear): Linear(in_features=59, out_features=66, bias=True)
  )
  (adder_layers): ModuleList(
    (0): FullAdder(
      (halfadder1): HalfAdder(
        (linear_or_and): Linear(in_features=66, out_features=66, bias=True)
        (linear_or_not): Linear(in_features=66, out_features=66, bias=True)
        (half_adder_final): Linear(in_features=66, out_features=66, bias=True)
      )
      (halfadder2): HalfAdder(
        (linear_or_and): Linear(in_features=66, out_features=66, bias=True)
        (linear_or_not): Linear(in_features=66, out_features=66, bias=True)
        (half_adder_final): Linear(in_features=66, out_features=66, bias=True)
      )
      (linear_or1): Linear(in_features=66, out_features=66, bias=True)
      (linear_or2): Linear(in_features=66, ou

In [6]:
# https://stackoverflow.com/questions/49201236/check-the-total-number-of-parameters-in-a-pytorch-model
def numel(m: torch.nn.Module, only_trainable: bool = False):
    """
    returns the total number of parameters used by `m` (only counting
    shared parameters once); if `only_trainable` is True, then only
    includes parameters with `requires_grad = True`
    """
    parameters = list(m.parameters())
    if only_trainable:
        parameters = [p for p in parameters if p.requires_grad]
    unique = {p.data_ptr(): p for p in parameters}.values()
    return sum(p.numel() for p in unique)

numel(tx, False), numel(tx, True)

(354509, 354509)

In [7]:
tx.T

83