**Library Imports, Installation and Downloads:**

In [11]:
#Import
import nltk
import spacy
from nltk.tree import TreePrettyPrinter

#Installation of Dependencies:

!pip install benepar==0.2.0 spacy==3.4.0 protobuf==3.20.3 nltk==3.7
!pip install supar
!pip install benepar
!pip install stanza


import benepar
#Download of SpaCy Models:
!python -m spacy download en_core_web_sm
!python -m spacy download fr_core_news_sm
!python -m spacy download de_core_news_sm
!python -m spacy download it_core_news_sm


2024-01-14 07:59:33.534387: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-14 07:59:33.534460: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-14 07:59:33.536058: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[33mDEPRECATION: https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl#egg=en_core_web_sm==3.4.1 contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found 

**Tree Generator for Natural Language Sentences**

In [12]:
def tree_generator(file, grammar_init, nlp):
    for sent in file:
# Parse the sentence using the provided spaCy NLP model
        parsed_sent = nlp(sent)

# Print the tokens and their part-of-speech tags for the current sentence
        for token in parsed_sent:
            print(token.text, token.pos_)
        print(f"{sent}\n")

# Initialize an empty grammar dictionary to store possible parts of speech and associated words
        grammar = {token.pos_: [f'"{token.text}"'] for token in parsed_sent}
# Iterate over tokens to collect words for each part of speech
        for type, words in grammar.items():
            for token in parsed_sent:
# Append unique words associated with the current part of speech
                if token.pos_ == type and f'"{token.text}"' not in words:
                    words.append(f'"{token.text}"')

# Create grammar rules
            grammar_string = f"{type} -> {' | '.join(words)}\n"
# Append the created grammar rules to the initial grammar string
            grammar_init += grammar_string

# Create an NLTK CFG (Context-Free Grammar) from the initialized grammar string
        nltk_grammar = nltk.CFG.fromstring(grammar_init)
# Create a ChartParser based on the NLTK CFG
        parser = nltk.ChartParser(nltk_grammar)
# Tokenize the sentence using spaCy and extract the tokenized words
        spacy_tokenized = [token.text for token in parsed_sent]

# Parse the tokenized words using the NLTK ChartParser
        trees = list(parser.parse(spacy_tokenized))

# If parse trees are generated, print the first tree and its pretty-printed representation
        if trees:
            print(trees[0])
            print(TreePrettyPrinter(trees[0]).text())

In [13]:
# Second Function for French Language to use Stanza library, as SpaCy does not work for French

def tree_generatorFR(file, grammar_init, nlp):
    for sent in file:
        # Parse the sentence using the provided stanza NLP model
        doc = nlp(sent)
        parsed_sent = doc.sentences[0]  # Access the first sentence in the document

        # Print the tokens and their part-of-speech tags for the current sentence
        for word in parsed_sent.words:
            print(word.text, word.pos)
        print(f"{sent}\n")

        # Initialize an empty grammar dictionary to store possible parts of speech and associated words
        grammar = {word.pos: [f'"{word.text}"'] for word in parsed_sent.words}
        # Iterate over tokens to collect words for each part of speech
        for type, words in grammar.items():
            for word in parsed_sent.words:
                # Append unique words associated with the current part of speech
                if word.pos == type and f'"{word.text}"' not in words:
                    words.append(f'"{word.text}"')

            # Create grammar rules
            grammar_string = f"{type} -> {' | '.join(words)}\n"
            # Append the created grammar rules to the initial grammar string
            grammar_init += grammar_string

        # Create an NLTK CFG (Context-Free Grammar) from the initialized grammar string
        nltk_grammar = nltk.CFG.fromstring(grammar_init)
        # Create a ChartParser based on the NLTK CFG
        parser = nltk.ChartParser(nltk_grammar)
        # Tokenize the sentence using stanza and extract the tokenized words
        stanza_tokenized = [word.text for word in parsed_sent.words]

        # Parse the tokenized words using the NLTK ChartParser
        trees = list(parser.parse(stanza_tokenized))

        # If parse trees are generated, print the first tree and its pretty-printed representation
        if trees:
            print(trees[0])
            print(TreePrettyPrinter(trees[0]).text())


**Context Free Grammar Definition for Sentence Parsing**

In [14]:
grammar = """

# Sentence structure rules
S -> NP VP PUNCT | NP VP | PUNCT NP VP PUNCT

# Noun Phrase rules
NP -> NOUN | NP ADJ | DET NP | ADJ NP

# Verb Phrase rules
VP -> VP NP | VERB | VP ADV | VP PUNCT
"""

***For English***

In [15]:
# Sample input sentence
file = [
    "The black cat sleeps peacefully."
]

# Load spaCy POS tagger for English
nlp = spacy.load("en_core_web_sm")
# Call the tree_generator function with the provided sentence, grammar, and spaCy POS tagger
tree_generator(file, grammar, nlp)

  


The DET
black ADJ
cat NOUN
sleeps VERB
peacefully ADV
. PUNCT
The black cat sleeps peacefully.

(S
  (NP (DET The) (NP (ADJ black) (NP (NOUN cat))))
  (VP (VP (VERB sleeps)) (ADV peacefully))
  (PUNCT .))
               S                              
       ________|___________________________    
      NP                   |               |  
  ____|____                |               |   
 |         NP              VP              |  
 |     ____|___       _____|______         |   
 |    |        NP    VP           |        |  
 |    |        |     |            |        |   
DET  ADJ      NOUN  VERB         ADV     PUNCT
 |    |        |     |            |        |   
The black     cat  sleeps     peacefully   .  



***For French***

In [16]:
# For French
import stanza

# Sample input sentence
file = [
    "Le chien blanc aboie joyeusement."
]

# Download the French model for stanza
stanza.download('fr')

# Load the French model for stanza
nlp = stanza.Pipeline(lang='fr', processors='tokenize,pos,lemma,depparse')

# Call the tree_generator function with the provided sentence, grammar, and stanza POS tagger
tree_generatorFR(file, grammar, nlp)


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.7.0.json:   0%|   …

INFO:stanza:Downloading default packages for language: fr (French) ...
INFO:stanza:File exists: /root/stanza_resources/fr/default.zip
INFO:stanza:Finished downloading models and saved to /root/stanza_resources.
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.7.0.json:   0%|   …

INFO:stanza:Loading these models for language: fr (French):
| Processor | Package           |
---------------------------------
| tokenize  | combined          |
| mwt       | combined          |
| pos       | combined_charlm   |
| lemma     | combined_nocharlm |
| depparse  | combined_charlm   |

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
INFO:stanza:Loading: mwt
INFO:stanza:Loading: pos
INFO:stanza:Loading: lemma
INFO:stanza:Loading: depparse
INFO:stanza:Done loading processors!


Le DET
chien NOUN
blanc ADJ
aboie VERB
joyeusement ADV
. PUNCT
Le chien blanc aboie joyeusement.

(S
  (NP (NP (DET Le) (NP (NOUN chien))) (ADJ blanc))
  (VP (VP (VERB aboie)) (ADV joyeusement))
  (PUNCT .))
                S                              
           _____|___________________________    
          NP               |                |  
      ____|_____           |                |   
     NP         |          VP               |  
  ___|____      |      ____|_______         |   
 |        NP    |     VP           |        |  
 |        |     |     |            |        |   
DET      NOUN  ADJ   VERB         ADV     PUNCT
 |        |     |     |            |        |   
 Le     chien blanc aboie     joyeusement   .  



***For German***

In [17]:
# Sample input sentence
file = [
"Der grüne Wald rauscht leise."
]

# Load spaCy POS tagger for German
nlp = spacy.load("de_core_news_sm")
# Call the tree_generator function with the provided sentence, grammar, and spaCy POS tagger
tree_generator(file, grammar, nlp)

Der DET
grüne ADJ
Wald NOUN
rauscht VERB
leise ADV
. PUNCT
Der grüne Wald rauscht leise.

(S
  (NP (DET Der) (NP (ADJ grüne) (NP (NOUN Wald))))
  (VP (VP (VERB rauscht)) (ADV leise))
  (PUNCT .))
               S                          
       ________|_______________________    
      NP                    |          |  
  ____|____                 |          |   
 |         NP               VP         |  
 |     ____|___        _____|____      |   
 |    |        NP     VP         |     |  
 |    |        |      |          |     |   
DET  ADJ      NOUN   VERB       ADV  PUNCT
 |    |        |      |          |     |   
Der grüne     Wald rauscht     leise   .  



***For Italian***

In [18]:
# Sample input sentence
file = [
    "Il gatto nero dorme tranquillamente."

]

# Load spaCy POS tagger for Italian
nlp = spacy.load("it_core_news_sm")
# Call the tree_generator function with the provided sentence, grammar, and spaCy POS tagger
tree_generator(file, grammar, nlp)

Il DET
gatto NOUN
nero ADJ
dorme VERB
tranquillamente ADV
. PUNCT
Il gatto nero dorme tranquillamente.

(S
  (NP (NP (DET Il) (NP (NOUN gatto))) (ADJ nero))
  (VP (VP (VERB dorme)) (ADV tranquillamente))
  (PUNCT .))
               S                                  
           ____|_______________________________    
          NP              |                    |  
      ____|____           |                    |   
     NP        |          VP                   |  
  ___|____     |      ____|_________           |   
 |        NP   |     VP             |          |  
 |        |    |     |              |          |   
DET      NOUN ADJ   VERB           ADV       PUNCT
 |        |    |     |              |          |   
 Il     gatto nero dorme     tranquillamente   .  

