In [1]:
from lambeq import BobcatParser, TreeReader, TreeReaderMode, spiders_reader, cups_reader, stairs_reader
from lambeq import TensorAnsatz, SpiderAnsatz, MPSAnsatz, AtomicType
from discopy import Dim
from classic_pipeline import *
from utilities import *

In [2]:
#define atomic-types

N = AtomicType.NOUN
S = AtomicType.SENTENCE
C = AtomicType.CONJUNCTION
P = AtomicType.PUNCTUATION
NP = AtomicType.NOUN_PHRASE
PP = AtomicType.PREPOSITIONAL_PHRASE

In [3]:
#parser declaration

bobcat_parser = BobcatParser(verbose = "progress")
spider_parser = spiders_reader
cups_parser = cups_reader
stairs_parser = stairs_reader
tree_parser = TreeReader(mode=TreeReaderMode.RULE_ONLY)

In [4]:
#ansatze declaration

tensor_ansatz = TensorAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
spider_ansatz = SpiderAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
mps_ansatz = MPSAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)}, bond_dim = 3)

In [None]:
#data-extraction for classic pipeline (linux)

pip = ClassicPipeline(cups_parser, tensor_ansatz)
pip.add_rewriter_rules(ClassicPipeline.SUPPORTED_RULES[0], ClassicPipeline.SUPPORTED_RULES[1], ClassicPipeline.SUPPORTED_RULES[4])
train_labels, train_circuits = pip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/GPS_edited.csv", "n")
test_labels, test_circuits = pip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/CPN_edited.csv", "n")
eval_labels, eval_circuits = pip.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/ePurse_edited.csv", "n")

In [5]:
#data-extraction for classic pipeline (win11)

pip = ClassicPipeline(cups_parser, tensor_ansatz)
pip.add_rewriter_rules(ClassicPipeline.SUPPORTED_RULES[0], ClassicPipeline.SUPPORTED_RULES[1], ClassicPipeline.SUPPORTED_RULES[4])
#train_labels, train_circuits = pip.create_circuits_and_labels("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\GPS_edited.csv", "n")
#test_labels, test_circuits = pip.create_circuits_and_labels("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\CPN_edited.csv", "n")
#eval_labels, eval_circuits = pip.create_circuits_and_labels("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\ePurse_edited.csv", "n")

In [6]:
#save_data("train_data.txt", train_labels, train_circuits)
#save_data("test_data.txt", test_labels, test_circuits)
#save_data("eval_data.txt", eval_labels, eval_circuits)

train_labels, train_circuits = load_data("train_data.txt")
test_labels, test_circuits = load_data("test_data.txt")
eval_labels, eval_circuits = load_data("eval_data.txt")

In [9]:
#train-set (GPS) faulty entries: 127:128 128:129 129:130 136:137 137:138 138:139 147:148 150:151 151:152 155:156 156:157
#test-set (CPN) faulty entries: None
#eval-set (ePurse) faulty entries: 22:23 34:46


rtrain_circuits = train_circuits[0:127] + train_circuits[130:136] + train_circuits[139:147] + train_circuits[148:150] + train_circuits[152:155] + train_circuits[157:]
reval_circuits = eval_circuits[0:22] + eval_circuits[23:34] + eval_circuits[46:]


In [None]:
"""
working parser/ansatz combos:
    - cups / (tensor, spider, mps) : works on all requirements    
    - stairs / (tensor) : works on all requirements
    - tree / (tensor) : works on all requirements
    - bobcat / (tensor, spider, mps) : works with 90+% of requirements
    
NB: missing combos don't work together, common exception raised is AxiomError   
"""


In [7]:
#training block for classical pipeline

train_set, test_set, eval_set = pip.create_dataset(train_labels, train_circuits), pip.create_dataset(test_labels, test_circuits), pip.create_dataset(eval_labels, eval_circuits)
pip.create_trainer(train_circuits, test_circuits, eval_circuits)
#pip.train_model(train_set, eval_set)
#pip.plot() 

In [None]:
"""
Choices to test:
    - Loss Function: for binary classification, the reccomended loss functions are BCELoss, BCEWithLogitsLoss, HingeEmbeddingLoss
    - Epochs: range of epochs between 10 and 100
    - Optimizer: see torch documentation
"""

In [8]:
from sklearn.model_selection import *

model, trainer = pip.get_model_trainer()

#"loss_function": [torch.nn.BCELoss, torch.nn.BCEWithLogitsLoss, torch.nn.HingeEmbeddingLoss]
parameters = {
    "epochs": list(range(10, 100))
}
tuner = GridSearchCV(model, parameters)
tuner.fit()