In [1]:
from lambeq import BobcatParser, cups_reader, stairs_reader
from lambeq import TensorAnsatz, SpiderAnsatz, MPSAnsatz, AtomicType
from discopy import Dim
from classic_pipeline import *
from utilities import *

In [2]:
N = AtomicType.NOUN
S = AtomicType.SENTENCE
C = AtomicType.CONJUNCTION
P = AtomicType.PUNCTUATION
NP = AtomicType.NOUN_PHRASE
PP = AtomicType.PREPOSITIONAL_PHRASE

In [3]:
tensor_ansatz = TensorAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
spider_ansatz = SpiderAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
mps_ansatz = MPSAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)}, bond_dim = 3)

In [4]:
pipeline = ClassicPipeline(stairs_reader, tensor_ansatz)
#gps_labels, gps_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/GPS.csv", "n")
#cpn_labels, cpn_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/CPN.csv", "n")
#epurse_labels, epurse_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/ePurse.csv", "n")

In [5]:
#save_data("gps_data.txt", gps_labels, gps_circuits)
#save_data("cpn_data.txt", cpn_labels, cpn_circuits)
#save_data("epurse_data.txt", epurse_labels, epurse_circuits)

gps_labels, gps_circuits = load_data("gps_data.txt")
cpn_labels, cpn_circuits = load_data("cpn_data.txt")
epurse_labels, epurse_circuits = load_data("epurse_data.txt")

In [None]:
from sklearn.model_selection import KFold

circ = gps_circuits + cpn_circuits
lab = gps_labels + cpn_labels

mean_acc = 0
mean_prec = 0
mean_rec = 0
mean_f1 = 0

kf = KFold(n_splits = 5, shuffle = True, random_state = 42)
for i, (train_index, test_index) in enumerate(kf.split(circ)):
    print(f"Fold {i+1}:")
    x_train = [circ[index] for index in train_index]
    x_test =[circ[index] for index in test_index]
    y_train = [lab[index] for index in train_index]
    y_test = [lab[index] for index in test_index]

    model = pipeline.create_model(circ)
    train_set = pipeline.create_dataset(x_train, y_train)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, 10, 10)
    
    
    test_accuracy = accuracy(model(x_test), torch.tensor(y_test))
    print(f"Test accuracy on fold {i+1}: {test_accuracy.item()}")
    mean_acc += test_accuracy
    
    test_precision = precision(model(x_test), torch.tensor(y_test))
    print(f"Test precision on fold {i+1}: {test_precision.item()}")
    mean_prec += test_precision
    
    test_recall = recall(model(x_test), torch.tensor(y_test))
    print(f"Test recall on fold {i+1}: {test_recall.item()}")
    mean_rec += test_recall
    
    test_f1 = f1score(model(x_test), torch.tensor(y_test))
    print(f"Test f1_score on fold {i+1}: {test_f1.item()}")
    mean_f1 += test_f1
    
mean_acc /= 5
mean_prec /= 5
mean_rec /= 5
mean_f1 /= 5

print(f"Metrics mean:\n\t- accuracy: {mean_acc}\n\t- precision: {mean_prec}\n\t- recall: {mean_rec}\n\t- f1: {mean_f1}")
