In [None]:
from lambeq import BobcatParser, cups_reader, stairs_reader
from lambeq import TensorAnsatz, SpiderAnsatz, MPSAnsatz, AtomicType
from discopy import Dim
from classic_pipeline import *
from utilities import *

In [2]:
N = AtomicType.NOUN
S = AtomicType.SENTENCE
C = AtomicType.CONJUNCTION
P = AtomicType.PUNCTUATION
NP = AtomicType.NOUN_PHRASE
PP = AtomicType.PREPOSITIONAL_PHRASE

In [3]:
tensor_ansatz = TensorAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
spider_ansatz = SpiderAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)})
mps_ansatz = MPSAnsatz({N: Dim(2), S: Dim(2), C: Dim(2), P: Dim(2), NP: Dim(2), PP: Dim(2)}, bond_dim = 3)

In [4]:
pipeline = ClassicPipeline(stairs_reader, tensor_ansatz)
#gps_labels, gps_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/GPS.csv", "n")
#cpn_labels, cpn_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/CPN.csv", "n")
#purse_labels, epurse_circuits = pipeline.create_circuits_and_labels("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/classical/ePurse.csv", "n")

In [5]:
#save_data("gps_data.txt", gps_labels, gps_circuits)
#save_data("cpn_data.txt", cpn_labels, cpn_circuits)
#save_data("epurse_data.txt", epurse_labels, epurse_circuits)

gps_labels, gps_circuits = load_data("gps_data.txt")
cpn_labels, cpn_circuits = load_data("cpn_data.txt")
epurse_labels, epurse_circuits = load_data("epurse_data.txt")

In [None]:
#5-fold cross validation for gps dataset
gps_folds = k_split(gps_circuits, gps_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(gps_folds, 1)

    circuits = []
    for item in gps_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for cpn dataset
cpn_folds = k_split(cpn_circuits, cpn_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(cpn_folds, 1)

    circuits = []
    for item in cpn_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for epurse dataset
epurse_folds = k_split(epurse_circuits, epurse_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(epurse_folds, 1)

    circuits = []
    for item in epurse_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for cpn_gps dataset
cpn_gps_folds = k_split(cpn_circuits + gps_circuits, cpn_labels + gps_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(cpn_gps_folds, 1)

    circuits = []
    for item in cpn_gps_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for cpn_epurse dataset
cpn_epurse_folds = k_split(cpn_circuits + epurse_circuits, cpn_labels + epurse_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(cpn_epurse_folds, 1)

    circuits = []
    for item in cpn_epurse_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for gps_epurse dataset
gps_epurse_folds = k_split(gps_circuits + epurse_circuits, gps_labels + epurse_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(gps_epurse_folds, 1)

    circuits = []
    for item in gps_epurse_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)

In [None]:
#5-fold cross validation for all dataset
all_folds = k_split(cpn_circuits + gps_circuits + epurse_circuits, cpn_labels + gps_labels + epurse_labels, 5)

for i in range(1, 6):
    print(f"Running fold {i} of 5:")
    train_set, test_set = pipeline.fold_datasets(all_folds, 1)

    circuits = []
    for item in all_folds:
        circuits += [item[j][0] for j in range(len(item))]

    model = pipeline.create_model(circuits)
    pipeline.create_trainer(model = model, loss = torch.nn.BCEWithLogitsLoss(), optimizer = torch.optim.AdamW, n_epochs = 100, lr = 3e-2)
    pipeline.train_model(train_set, test_set, 1, 1)