In [7]:
#pip install transformers

In [2]:
import torch
from transformers import GPT2Tokenizer

# Text representing logic clauses
text = """
Clauses:
   parent(A, B) :- mother(A, B).
   parent(A, B) :- father(A, B).
   grandfather(A, B) :- father(A, C), parent(C, B).
   grandparent(A, B) :- parent(A, C), parent(C, B).
"""

# Tokenize the text using a transformer tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokens = tokenizer.encode(text)

# Convert tokens to PyTorch tensor
tensor = torch.tensor(tokens)

# Convert PyTorch tensor back to tokens
tokens_back = tensor.tolist()

# Decode tokens to text
text_back = tokenizer.decode(tokens_back)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
tensor

tensor([  198, 47404,  2664,    25,   198,   220,   220,  2560,     7,    32,
           11,   347,     8,  1058,    12,  2802,     7,    32,    11,   347,
          737,   198,   220,   220,  2560,     7,    32,    11,   347,     8,
         1058,    12,  2988,     7,    32,    11,   347,   737,   198,   220,
          220, 17695,     7,    32,    11,   347,     8,  1058,    12,  2988,
            7,    32,    11,   327,   828,  2560,     7,    34,    11,   347,
          737,   198,   220,   220,  4490,  8000,     7,    32,    11,   347,
            8,  1058,    12,  2560,     7,    32,    11,   327,   828,  2560,
            7,    34,    11,   347,   737,   198])

In [6]:
tensor.type

<function Tensor.type>

In [4]:
text_back

'\nClauses:\n   parent(A, B) :- mother(A, B).\n   parent(A, B) :- father(A, B).\n   grandfather(A, B) :- father(A, C), parent(C, B).\n   grandparent(A, B) :- parent(A, C), parent(C, B).\n'

In [16]:
from collections import OrderedDict
value = '[person(andrew)., person(bernard)., person(cathleen)., person(daphne)., person(edith)., person(fred)., person(george)., person(john)., person(louis)., person(oscar)., person(paul)., person(robert)., person(stephen)., person(sylvia)., person(william)., person(ada)., father(william, sylvia)., father(oscar, louis)., father(oscar, daphne)., father(oscar, cathleen)., father(oscar, fred)., father(oscar, bernard)., father(louis, stephen)., father(louis, andrew)., father(louis, robert)., father(louis, john)., father(george, oscar)., father(paul, edith)., mother(sylvia, stephen)., mother(sylvia, andrew)., mother(sylvia, robert)., mother(sylvia, john)., mother(edith, louis)., mother(edith, daphne)., mother(edith, cathleen)., mother(edith, fred)., mother(edith, bernard)., mother(ada, sylvia)., parent(A, B) :- mother(A, B)., parent(A, B) :- father(A, B)., grandfather(A, B) :- father(A, C), parent(C, B)., grandparent(A, B) :- parent(A, C), parent(C, B).])'

In [18]:
from andante.collections import OrderedSet
from typing import Union

def get_tensor(rules:Union[str,OrderedSet]):
    # Tokenize the text using a transformer tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    if isinstance(rules,str):
        tokens = tokenizer.encode(text) 
    elif isinstance(rules,OrderedSet):
        text = "\n".join([f"{k}: {v}" for k, v in rules.items()])
        tokens = tokenizer.encode(text)
    else:
        raise ValueError("Input must be either a string or an OrderedDict")
    # Convert tokens to PyTorch tensor
    tensor = torch.tensor(tokens)
    return tensor