In [17]:
from lambeq import BobcatParser, TreeReader, TreeReaderMode, spiders_reader, cups_reader, stairs_reader
from lambeq import TensorAnsatz, SpiderAnsatz, MPSAnsatz, AtomicType, IQPAnsatz
from lambeq import SpacyTokeniser
from discopy import Dim, grammar
from utilities import *

In [2]:
def find_faults_in_file(file: str):
    tokeniser = SpacyTokeniser()
    parser = BobcatParser(verbose = "progress")
    
    labels, sentences = extract_data(file)
    tokens = tokeniser.tokenise_sentences(sentences)

    faults = []
    i = 0
    count = 0
    while i < len(tokens):
        try:
            #print(f"parsing string {i} of {len(tokens)}")
            diagram = parser.sentence2diagram(tokens[i], tokenised = True)
            i += 1
        except Exception: 
            faults.append(sentences[i])
            print(f"fault on sentence {i}")
            count += 1
            i += 1
            continue
        
    return faults
    
def check_fixed_faults(tokens_list):
    tokeniser = SpacyTokeniser()
    parser = BobcatParser(verbose = "progress")
    tokens = tokeniser.tokenise_sentences(tokens_list)

    for i in range(len(tokens)):
        try:
            print(f"parsing sentence {i} of {len(tokens)}")
            diagram = parser.sentence2diagram(tokens[i], tokenised=True)
        except Exception:
            print(f"Error on sentence {i}")
            continue

    print("Loop done")

def find_duplicates(filename):
    duplicates = []
    position = 1
    with open(filename) as f:
        seen = set()
        for line in f:
            if line in seen:
                duplicates.append( (line, position) )
                print(line, position)
                position += 1
            else:
                seen.add(line)
                position += 1
    
    return duplicates

In [50]:
tokeniser = SpacyTokeniser()
parser = BobcatParser(verbose = "progress")
token = tokeniser.tokenise_sentence("sample text")
diagram = parser.sentence2diagram(token, tokenised = True)
print(diagram)

sample >> Id(n @ n.l) @ text >> Id(n) @ Cup(n.l, n)


In [59]:
dup = find_duplicates("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\GPS_edited.csv")
len(dup)

0

In [64]:
#windows 11
print(len(find_faults_in_file("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\CPN_edited.csv")))
print(len(find_faults_in_file("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\ePurse_edited.csv")))
print(len(find_faults_in_file("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\GPS_edited.csv")))

#arch linux
print(len(find_faults_in_file("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/GPS_edited.csv")))
print(len(find_faults_in_file("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/CPN_edited.csv")))
print(len(find_faults_in_file("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/ePurse_edited.csv")))

fault on sentence 34
fault on sentence 110
2


In [None]:
""" 
    In questa sezione è presente il testing per la conversione da diagrammi a circuiti.
"""

In [38]:
def create_diagrams(dataset: str):
    tokeniser = SpacyTokeniser()
    parser = BobcatParser(verbose = "progress")
    labels, sentences = extract_data(dataset)
    
    tokens = tokeniser.tokenise_sentences(sentences)
    diagrams = parser.sentences2diagrams(tokens, tokenised = True)
    
    return diagrams

def create_circuits(diagrams: list):
    ansatz = MPSAnsatz({AtomicType.NOUN: Dim(2), AtomicType.SENTENCE: Dim(2)}, 3)
    circuits = [ansatz(diagram) for diagram in diagrams]
    return circuits
    
def get_faults_from_diagrams(diagrams: list):
    ansatz = TensorAnsatz({AtomicType.NOUN: Dim(2), AtomicType.SENTENCE: Dim(2), AtomicType.CONJUNCTION: Dim(2), AtomicType.PUNCTUATION: Dim(2), AtomicType.NOUN_PHRASE: Dim(2), AtomicType.PREPOSITIONAL_PHRASE: Dim(2)})
    faults = []
    
    for i in range(len(diagrams)):
        try:
            #print(f"circuiting diagram {i} of {len(diagrams)}")
            circuit = ansatz(diagrams[i])
        except Exception:
            faults.append(diagrams[i])
            #print(f"fault on diagram {i}")
            continue
    
    return faults

In [4]:
#windows 11
cpn_diagrams = create_diagrams("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\CPN_edited.csv")
epurse_diagrams = create_diagrams("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\ePurse_edited.csv")
gps_diagrams = create_diagrams("C:\\Users\\calif\\Documents\\GitHub\\Tesi-Quantum-NLP\\project\\datasets\\edited_datasets\\GPS_edited.csv")


"""#arch linux
cpn_diagrams = create_diagrams("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/CPN_edited.csv")
epurse_diagrams = create_diagrams("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/ePurse_edited.csv")
gps_diagrams = create_diagrams("/home/adriano22_/Documents/GitHub/Tesi-Quantum-NLP/project/datasets/edited_datasets/GPS_edited.csv")"""


print(f"Diagrams parsed in cpn_diagrams: {len(cpn_diagrams)}")
print(f"Diagrams parsed in epurse_diagrams: {len(epurse_diagrams)}")
print(f"Diagrams parsed in gps_diagrams: {len(gps_diagrams)}")

Tagging sentences:   0%|          | 0/37 [00:00<?, ?it/s]

Parsing tagged sentences:   0%|          | 0/146 [00:00<?, ?it/s]

Parse trees to diagrams:   0%|          | 0/146 [00:00<?, ?it/s]

Tagging sentences:   0%|          | 0/31 [00:00<?, ?it/s]

Parsing tagged sentences:   0%|          | 0/124 [00:00<?, ?it/s]

Parse trees to diagrams:   0%|          | 0/124 [00:00<?, ?it/s]

Tagging sentences:   0%|          | 0/42 [00:00<?, ?it/s]

Parsing tagged sentences:   0%|          | 0/168 [00:00<?, ?it/s]

Parse trees to diagrams:   0%|          | 0/168 [00:00<?, ?it/s]

Diagrams parsed in cpn_diagrams: 146
Diagrams parsed in epurse_diagrams: 124
Diagrams parsed in gps_diagrams: 168


In [39]:
cpn_faults = get_faults_from_diagrams(cpn_diagrams)
print(len(cpn_faults))

epurse_faults = get_faults_from_diagrams(epurse_diagrams)
print(len(epurse_faults))

gps_faults = get_faults_from_diagrams(gps_diagrams)
print(len(gps_faults))

0
0
0
