In [1]:
from lambeq import BobcatParser, TreeReader, TreeReaderMode, spiders_reader, cups_reader, stairs_reader
from lambeq import TensorAnsatz, SpiderAnsatz, MPSAnsatz, AtomicType, IQPAnsatz
from discopy import Dim
from classic_pipeline import *
from quantum_pipeline import *
from utilities import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#define atomic-types

N = AtomicType.NOUN
S = AtomicType.SENTENCE
C = AtomicType.CONJUNCTION
P = AtomicType.PUNCTUATION
NP = AtomicType.NOUN_PHRASE
PP = AtomicType.PREPOSITIONAL_PHRASE

In [3]:
#parser declaration

bobcat_parser = BobcatParser(verbose = "progress")
spider_parser = spiders_reader
cups_parser = cups_reader
stairs_parser = stairs_reader
tree_parser = TreeReader(mode=TreeReaderMode.RULE_ONLY)

In [4]:
#ansatze declaration

tensor_ansatz = TensorAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
iqp_ansatz = IQPAnsatz({N: 2, S: 2, C: 2, P: 2, NP: 2, PP: 2}, n_layers=2, n_single_qubit_params=3)

In [5]:
#data-extracion for classic pipeline

pip = ClassicPipeline(bobcat_parser, tensor_ansatz)
pip.add_rewriter_rules(ClassicPipeline.SUPPORTED_RULES[0], ClassicPipeline.SUPPORTED_RULES[1], ClassicPipeline.SUPPORTED_RULES[4])
#train_labels, train_circuits = pip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/GPS_edited.csv", "n")
#test_labels, test_circuits = pip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/ePurse_edited.csv", "n")

In [6]:
#save_data("train_data.txt", train_labels, train_circuits)
#save_data("test_data.txt", test_labels, test_circuits)

train_labels, train_circuits = load_data("train_data.txt")
test_labels, test_circuits = load_data("test_data.txt")

In [None]:
#data-extracion for quantum pipeline

qpip = QuantumPipeline(bobcat_parser, iqp_ansatz)
pip.add_rewriter_rules(ClassicPipeline.SUPPORTED_RULES[0], ClassicPipeline.SUPPORTED_RULES[1], ClassicPipeline.SUPPORTED_RULES[4])
qtrain_labels, qtrain_circuits = qpip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/GPS_edited.csv")
qtest_labels, qtest_circuits = qpip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/ePurse_edited.csv")

In [None]:
new_train_circuits = pip.normalize_diagrams(train_circuits[0:5])
new_test_circuits = pip.normalize_diagrams(test_circuits[0:5])

In [None]:
#training block for classical pipeline

train_set, test_set = pip.create_dataset(train_labels[0:5], new_train_circuits), pip.create_dataset(test_labels[0:5], new_test_circuits)
pip.create_trainer(new_train_circuits, new_test_circuits)
pip.train_model(train_set, test_set)
pip.plot() 

In [None]:
#training block for quantum pipeline

qtrain_set, qtest_set = qpip.create_dataset(qtrain_labels, qtrain_circuits), qpip.create_dataset(qtest_labels, qtest_circuits)
qpip.create_trainer(qtrain_circuits, qtest_circuits)
qpip.train_model(qtrain_set, qtest_set)
qpip.plot()

In [22]:
train_tensors = []
test_tensors = []

for circuit in train_circuits:
    symbols = set(circuit.free_symbols)
    symbol2index = {symbol: i for i, symbol in enumerate(symbols)}

    N = len(symbols)
    matrix = np.zeros((N, N), dtype=bool)
    for i, symbol_i in enumerate(symbols):
        for j, symbol_j in enumerate(symbols):
            if any(box.dom == symbol_i and box.cod == symbol_j for box in circuit.boxes):
                matrix[i, j] = True

    tensor = torch.tensor(np.stack([matrix, matrix], axis=-1), dtype=torch.bool)
    train_tensors.append(tensor)

for circuit in test_circuits:
    symbols = set(circuit.free_symbols)
    symbol2index = {symbol: i for i, symbol in enumerate(symbols)}

    N = len(symbols)
    matrix = np.zeros((N, N), dtype=bool)
    for i, symbol_i in enumerate(symbols):
        for j, symbol_j in enumerate(symbols):
            if any(box.dom == symbol_i and box.cod == symbol_j for box in circuit.boxes):
                matrix[i, j] = True

    tensor = torch.tensor(np.stack([matrix, matrix], axis=-1), dtype=torch.bool)
    test_tensors.append(tensor)

In [48]:
from lambeq import BobcatParser, AtomicType
from lambeq import TensorAnsatz
from discopy import Box, Ty, Dim, Word
# Define atomic types
N = AtomicType.NOUN
S = AtomicType.SENTENCE

sentence = 'John walks in the park'

# Get a string diagram
parser = BobcatParser(verbose='text')
tensor_ansatz = TensorAnsatz({N: Dim(2), S: Dim(2)})
diagram = parser.sentence2diagram(sentence)
tensor_diagram = tensor_ansatz(diagram)


pad_diagrams = [tensor_ansatz(parser.sentence2diagram("parola")) for i in range(1, 20)]

new_diagram = tensor_diagram
for pad in pad_diagrams:
    new_diagram = pad @ new_diagram

print(new_diagram)
print(len(new_diagram))
print(len(tensor_diagram))


parola >> Id(Dim(2)) @ parola >> Id(Dim(2, 2)) @ parola >> Id(Dim(2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ parola >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)) @ John >> Id(Dim(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,