In [17]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import re
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu
from transformers import AutoTokenizer
import spacy
import nltk

# Ensure required packages are installed
!pip install spacy nltk transformers
!python -m spacy download en_core_web_sm
!python -m spacy download fr_core_news_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     -- ------------------------------------- 0.8/12.8 MB 2.4 MB/s eta 0:00:06
     ---- ----------------------------------- 1.6/12.8 MB 3.1 MB/s eta 0:00:04
     ------- -------------------------------- 2.4/12.8 MB 3.4 MB/s eta 0:00:04
     --------- ------------------------------ 2.9/12.8 MB 3.0 MB/s eta 0:00:04
     --------- ------------------------------ 2.9/12.8 MB 3.0 MB/s eta 0:00:04
     ------------- -------------------------- 4.5/12.8 MB 3.2 MB/s eta 0:00:03
     --------------- ------------------------ 5.0/12.8 MB 3.1 MB/s eta 0:00:03
     ----------------- ---------------------- 5.5/12.8 

In [18]:
# Load dataset
data = pd.read_csv("eng-french.csv", nrows=50)
data.columns = ['english', 'french']

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

# Clean English and French columns
data['english'] = data['english'].apply(clean_text)
data['french'] = data['french'].apply(clean_text)

data.head()


Unnamed: 0,english,french
0,hi,salut
1,run,cours
2,run,courez
3,who,qui
4,wow,a alors


In [19]:
# Sub-word tokenization using pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_text(text):
    return tokenizer.tokenize(text)

data['english_subtokens'] = data['english'].apply(tokenize_text)
data['french_subtokens'] = data['french'].apply(tokenize_text)

print(data[['english', 'english_subtokens']].head())
print(data[['french', 'french_subtokens']].head())



  english english_subtokens
0      hi              [hi]
1     run             [run]
2     run             [run]
3     who             [who]
4     wow             [wow]
    french  french_subtokens
0    salut       [sal, ##ut]
1    cours       [co, ##urs]
2   courez  [co, ##ure, ##z]
3      qui             [qui]
4  a alors    [a, al, ##ors]


In [20]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
nlp1 = spacy.load("fr_core_news_sm")

# Function to extract grammar tree
def extract_grammar_tree(sentence):
    doc = nlp(sentence)
    return [(token.text, token.dep_, token.head.text) for token in doc]

def extract_grammar_tree1(sentence):
    doc = nlp1(sentence)
    return [(token.text, token.dep_, token.head.text) for token in doc]

# Apply grammar tree extraction
data['english_grammar_tree'] = data['english'].apply(extract_grammar_tree)
data['french_grammar_tree'] = data['french'].apply(extract_grammar_tree1)

print(data[['english', 'english_grammar_tree']].head())
print(data[['french', 'french_grammar_tree']].head())


  english english_grammar_tree
0      hi     [(hi, ROOT, hi)]
1     run   [(run, ROOT, run)]
2     run   [(run, ROOT, run)]
3     who   [(who, ROOT, who)]
4     wow   [(wow, ROOT, wow)]
    french                 french_grammar_tree
0    salut              [(salut, ROOT, salut)]
1    cours              [(cours, ROOT, cours)]
2   courez            [(courez, ROOT, courez)]
3      qui                  [(qui, ROOT, qui)]
4  a alors  [(a, ROOT, a), (alors, advmod, a)]


In [21]:
# Function to extract Subject-Verb-Object (SVO) triplets
def extract_svo(doc):
    subject, verb, obj = None, None, None
    for token in doc:
        if token.dep_ == "nsubj":
            subject = token.text
        elif token.dep_ == "ROOT":
            verb = token.text
        elif token.dep_ == "dobj":
            obj = token.text
    return (subject, verb, obj)

# Apply SVO extraction
data['english_svo'] = data['english'].apply(lambda x: extract_svo(nlp(x)))
data['french_svo'] = data['french'].apply(lambda x: extract_svo(nlp(x)))

print(data[['english', 'english_svo']].head())
print(data[['french', 'french_svo']].head())




  english        english_svo
0      hi   (None, hi, None)
1     run  (None, run, None)
2     run  (None, run, None)
3     who  (None, who, None)
4     wow  (None, wow, None)
    french            french_svo
0    salut   (None, salut, None)
1    cours   (None, cours, None)
2   courez  (None, courez, None)
3      qui     (None, qui, None)
4  a alors   (None, alors, None)


In [22]:
import pennylane as qml
from pennylane import numpy as np

In [23]:
# Quantum Circuit Representation
n_qubits = 4
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev)
def encode_grammar_tree(grammatical_data):
    """
    Quantum circuit to encode grammatical relationships into quantum states.
    """
    # Encode grammatical relationships into rotations
    for i, (word, dep, head) in enumerate(grammatical_data[:n_qubits]):
        # Example encoding: rotation based on word length and dependency type
        qml.RX(len(word) * 0.1, wires=i)
        qml.RZ(len(dep) * 0.1, wires=i)
    # Apply entanglement
    qml.templates.BasicEntanglerLayers(weights=np.ones((1, n_qubits)), wires=range(n_qubits))
    return qml.state()

In [24]:
# Example: Apply quantum encoding to one sentence
def process_sentence_for_quantum(sentence, extract_function):
    parsed_data = extract_function(nlp(sentence))
    return encode_grammar_tree(parsed_data)

# Encode a subset of the English grammar tree
data['quantum_encoding'] = data['english_grammar_tree'].apply(
    lambda x: encode_grammar_tree(x)
)
# Encode a subset of the English grammar tree
data['quantum_encoding'] = data['french_grammar_tree'].apply(
    lambda x: encode_grammar_tree(x)
)

# Output results
print(data[['english', 'english_grammar_tree', 'quantum_encoding']].head())
print(data[['french', 'french_grammar_tree', 'quantum_encoding']].head())

  english english_grammar_tree  \
0      hi     [(hi, ROOT, hi)]   
1     run   [(run, ROOT, run)]   
2     run   [(run, ROOT, run)]   
3     who   [(who, ROOT, who)]   
4     wow   [(wow, ROOT, wow)]   

                                    quantum_encoding  
0  [(0.4846698253824738-0.13010060239724272j), (0...  
1  [(0.4846698253824738-0.13010060239724272j), (0...  
2  [(0.4614976850499552-0.1315983254104966j), (0....  
3  [(0.5273249492321096-0.12613416143556644j), (0...  
4  [(0.41035279149178144-0.2805558934729811j), (0...  
    french                 french_grammar_tree  \
0    salut              [(salut, ROOT, salut)]   
1    cours              [(cours, ROOT, cours)]   
2   courez            [(courez, ROOT, courez)]   
3      qui                  [(qui, ROOT, qui)]   
4  a alors  [(a, ROOT, a), (alors, advmod, a)]   

                                    quantum_encoding  
0  [(0.4846698253824738-0.13010060239724272j), (0...  
1  [(0.4846698253824738-0.13010060239724272j), (0...  