# Implementing Parse Trees from a Given Sentence and Corpus
1. Construct **parse trees** for given dataset.
2. Implement a syntax tree generation for Indigenous / English language structures.
3. Analyse parsing challenges for Indian language datasets

In [None]:
!pip install nltk stanza spacy
!python -m spacy download xx_ent_wiki_sm

In [None]:
import nltk
from nltk import CFG
import spacy
from spacy import displacy
import stanza

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab')

In [None]:
stanza.download('mr')
nlp = stanza.Pipeline('mr')

In [None]:
sentence = "मी कॉलेज जात आहे"
doc = nlp(sentence)
for sent in doc.sentences:
    sent.print_dependencies()

In [None]:
marathi_grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Pronoun | Noun | Pronoun Noun | Noun Adj | Noun PP
    VP -> Verb | Verb NP | Verb Adj | Verb PP | Verb VP
    PP -> Preposition NP
    Pronoun -> 'मी' | 'तू' | 'तो' | 'ती' | 'आपण' | 'ते'
    Noun -> 'कॉलेज' | 'घर' | 'पुस्तक' | 'शाळा' | 'मित्र'
    Verb -> 'जात' | 'आहे' | 'वाचतो' | 'करतो' | 'तुम्ही' | 'खात'
    Adj -> 'सुंदर' | 'मोठं' | 'छोटं' | 'दूर' | 'जड'
    Preposition -> 'मध्ये' | 'कडे' | 'वर' | 'खाली'
""")

parser = nltk.ChartParser(marathi_grammar)
sentence = nltk.word_tokenize('मी कॉलेज जात आहे')

for tree in parser.parse(sentence):
    tree.pretty_print()

In [None]:
sentence_1 = "मी कॉलेज जात आहे."  # Subject-Object-Verb (SOV)
sentence_2 = "कॉलेज मी जात आहे."  # Object-Subject-Verb (OSV)
sentence_3 = "घरात मुलगा खेळत आहे."  # Complex structure with adverbial phrases

print("Sentence 1 parse: ")
doc1 = nlp(sentence_1)
for sent in doc1.sentences:
    sent.print_dependencies()

print("\nSentence 2 parse: ")
doc2 = nlp(sentence_2)
for sent in doc2.sentences:
    sent.print_dependencies()

print("\nSentence 3 parse: ")
doc3 = nlp(sentence_3)
for sent in doc3.sentences:
    sent.print_dependencies()