# Implementing Parse Trees from a Given Sentence and Corpus
1. Construct **parse trees** for given dataset.
2. Implement a syntax tree generation for Indigenous / English language structures.
3. Analyse parsing challenges for Indian language datasets

In [None]:
!pip install nltk stanza spacy
!python -m spacy download xx_ent_wiki_sm

In [None]:
import nltk
from nltk import CFG
import spacy
from spacy import displacy
import stanza

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

In [None]:
stanza.download('mr')  # Download the Marathi model
nlp = stanza.Pipeline('mr')

sentence = "मी कॉलेज जात आहे"
doc = nlp(sentence)

In [None]:
nltk.download('punkt_tab')
marathi_grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Pronoun | Noun
    VP -> Verb | Verb NP
    Pronoun -> 'मी' | 'तू' | 'तो'
    Noun -> 'कॉलेज' | 'घर'
    Verb -> 'जात' | 'आहे'
""")

parser = nltk.ChartParser(marathi_grammar)
sentence = nltk.word_tokenize('मी कॉलेज जात आहे')

for tree in parser.parse(sentence):
    tree.pretty_print()

In [None]:
marathi_grammar_extended = CFG.fromstring("""
    S -> NP VP
    NP -> Pronoun | Noun
    VP -> Verb | Verb NP
    Pronoun -> 'मी' | 'तू' | 'तो' | 'ती'
    Noun -> 'कॉलेज' | 'घर' | 'मुलगा' | 'मुलगी'
    Verb -> 'जात' | 'आहे' | 'करतो' | 'आहेत'
""")

sentence = nltk.word_tokenize("मी कॉलेज जात आहे")
parser = nltk.ChartParser(marathi_grammar_extended)
for tree in parser.parse(sentence):
    tree.pretty_print()

In [None]:
sentence_1 = "मी कॉलेज जात आहे."  # Subject-Object-Verb (SOV)
sentence_2 = "कॉलेज मी जात आहे."  # Object-Subject-Verb (OSV)
sentence_3 = "घरात मुलगा खेळत आहे."  # Complex structure with adverbial phrases

print("Sentence 1 parse: ")
doc1 = nlp(sentence_1)
for sent in doc1.sentences:
    sent.print_dependencies()

print("\nSentence 2 parse: ")
doc2 = nlp(sentence_2)
for sent in doc2.sentences:
    sent.print_dependencies()

print("\nSentence 3 parse: ")
doc3 = nlp(sentence_3)
for sent in doc3.sentences:
    sent.print_dependencies()