In [1]:
!pip install ja-ginza nltk svgling

Collecting ja-ginza
  Downloading ja_ginza-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting svgling
  Downloading svgling-0.5.0-py3-none-any.whl.metadata (7.4 kB)
Collecting sudachipy<0.7.0,>=0.6.2 (from ja-ginza)
  Downloading SudachiPy-0.6.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting sudachidict-core>=20210802 (from ja-ginza)
  Downloading sudachidict_core-20250515-py3-none-any.whl.metadata (2.7 kB)
Collecting ginza<5.3.0,>=5.2.0 (from ja-ginza)
  Downloading ginza-5.2.0-py3-none-any.whl.metadata (448 bytes)
Collecting svgwrite (from svgling)
  Downloading svgwrite-1.4.3-py3-none-any.whl.metadata (8.8 kB)
Collecting plac>=1.3.3 (from ginza<5.3.0,>=5.2.0->ja-ginza)
  Downloading plac-1.4.5-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading ja_ginza-5.2.0-py3-none-any.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading svgling-0.5.0-py3-none

In [2]:
!pip install -U ginza spacy



In [3]:
!pip install "https://github.com/megagonlabs/ginza/releases/download/latest/ginza-latest.tar.gz"

Collecting https://github.com/megagonlabs/ginza/releases/download/latest/ginza-latest.tar.gz
  Downloading https://github.com/megagonlabs/ginza/releases/download/latest/ginza-latest.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [4]:
import spacy
import nltk
from nltk import CFG, ChartParser
from nltk.tree import Tree
from spacy.lang.ja import Japanese

nlp = Japanese()

In [56]:
class BinaryJapaneseParser:
    def __init__(self):
        """Initializes parser with binary Japanese grammar"""
        self.grammar = CFG.fromstring("""
            S -> S1 | S2 | S3 | S4 | S5 | S6 | S7

            NP_TOPIC -> NP "は" | NP "も" | NP

            S1 -> NP_TOPIC VP
            S1 -> NP VP
            S2 -> S1 "か"
            S3 -> NP_TOPIC VP_NEG
            S4 -> V_IMP | VP_IMP_NEG
            S5 -> COND VP
            S6 -> NP_PASSIVE V_PASSIVE
            S7 -> NP REL_NOUN

            NP -> DET NOUN
            NP -> NOUN
            NP -> PRONOUN
            NP -> NP_PARTICLE
            NP -> NP_NOUN

            NP_PARTICLE -> NP PARTICLE
            NP_NOUN -> NP "の" NOUN
            NP_PASSIVE -> NP "に"
            REL_NOUN -> VP "の" NOUN

            VP -> V
            VP -> V_AUX
            VP -> V_NP
            VP -> ADJ
            VP -> ADJ_DESU
            VP -> ADV_VP
            VP -> VP_PP
            V -> V_STEM V_INFL | V_WHOLE

            V_AUX -> V AUX
            V_NP -> NP V
            VP_NEG -> VP "ない"
            VP_NEG -> ADJ "ない"
            ADV_VP -> ADV VP
            VP_PP -> VP PP
            VP_IMP_NEG -> V_STEM "なさい"

            V_WHOLE -> "食べる" | "読む" | "いる" | "ある" | "走る" | "降る" | "褒める" | "走れ" | "見る" | "する"
            V_STEM -> "食べ" | "読み" | "い" | "あ" | "走り" | "降り" | "褒め" | "見"
            V_INFL -> "る" | "た" | "て" | "れば" | "ろ" | "ます" | "ません"

            V_IMP -> V_IMP_BASE | V_IMP_SUFFIX
            V_IMP_BASE -> "走れ" | "降れ" | "見ろ"
            V_IMP_SUFFIX -> V_STEM "ろ" | V_TE "ください"
            V_TE -> V_STEM "て"
            V_PASSIVE -> V_STEM "られる"

            ADJ_DESU -> ADJ "です"

            COND -> NP PARTICLE V_COND "ば"
            V_COND -> V_STEM "れ"

            PP -> NP POSTPOSITION

            AUX -> "ます" | "ました" | "たい" | "られる" | "ない"
            ADJ -> "大きい" | "小さい" | "美味しい" | "速い" | "可愛い" | "新しい" | "高い"
            ADV -> "とても" | "速く" | "よく" | "もっと"
            POSTPOSITION -> "で" | "に" | "から" | "まで" | "へ" | "と"

            DET -> "その" | "この" | "あの" | "どの"
            NOUN -> "本" | "猫" | "学生" | "先生" | "公園" | "魚" | "家" | "雨" | "犬" | "声" | "車" | "学校" | "エドゥアルドクリシンスキ"
            PRONOUN -> "私" | "あなた" | "彼" | "彼女" | "誰"
            PARTICLE -> "が" | "を" | "に" | "で" | "へ" | "と" | "から"
        """)
        self.parser = ChartParser(self.grammar)

    def parse_sentence(self, sentence):
        doc = nlp(sentence)
        tokens = []
        for token in doc:
          tokens.append(str(token))
        try:
            trees = list(self.parser.parse(tokens))
            # print([t for t in trees if isinstance(t, Tree)]) #
            return [t for t in trees if isinstance(t, Tree)]
        except (ValueError, IndexError) as e:
            print(f"Parsing error: {e}")
            return []

    def analyze_sentence(self, sentence):
        print(f"\nAnalyzing: 「{sentence}」")
        trees = self.parse_sentence(sentence)
        # print('print(trees)')
        # print(trees) #

        if not trees:
            print("> Sentence doesn't match grammar")
            return False

        print(f"> Found {len(trees)} parse tree{'s' if len(trees) > 1 else ''}:")
        for i, tree in enumerate(trees, 1):
            print(f"\nTree {i}:")
            tree.pretty_print()
            self._analyze_tree(tree)

        return True

    def _analyze_tree(self, tree):
        def get_sentence_type(tree):
            labels = {
                'S1': 'Declarative',
                'S2': 'Question',
                'S3': 'Negative',
                'S4': 'Imperative',
                'S5': 'Conditional',
                'S6': 'Passive',
                'S7': 'Relative'
            }
            first_child = tree[0]  # Gets the first subtree
            first_child_label = first_child.label()
            return labels.get(str(first_child_label))

        print(f"\nSentence type: {get_sentence_type(tree)}")

        def analyze_node(node, depth=0):
            indent = "  " * depth
            if isinstance(node, str):
                print(f"{indent}Token: {node}")
            else:
                print(f"{indent}{node.label()}:")
                for child in node:
                  analyze_node(child, depth+1)

        return analyze_node(tree)


if __name__ == "__main__":
    parser = BinaryJapaneseParser()

    # Test sentences
    test_sentences = [
        "猫は魚を食べる",
        "本を読む",
        "読みなさい",
        "雨が降る",
        "先生に褒められる",
        "私は走る",
        "この本を読みます",
        "公園で走る",
        "彼女は家にいる"
    ]

    print("=== Testing Basic Sentences ===")
    for sentence in test_sentences:
        parser.analyze_sentence(sentence)

    # More complex sentences
    print("\n=== Testing Complex Sentences ===")
    complex_sentences = [
        "本を読むの学生",
        "雨が降れば家にいる",
        "私はとても速く走る"
    ]
    for sentence in complex_sentences:
        parser.analyze_sentence(sentence)

    # Interactive mode
    print("\n=== Interactive Mode ===")
    print("Enter Japanese sentence, or 'q' to quit")
    while True:
        user_input = input("> ").strip()
        if user_input.lower() == 'q':
            break
        if user_input:
            parser.analyze_sentence(user_input)

=== Testing Basic Sentences ===

Analyzing: 「猫は魚を食べる」
print(trees)
[Tree('S', [Tree('S1', [Tree('NP_TOPIC', [Tree('NP', [Tree('NOUN', ['猫'])]), 'は']), Tree('VP', [Tree('V_NP', [Tree('NP', [Tree('NP_PARTICLE', [Tree('NP', [Tree('NOUN', ['魚'])]), Tree('PARTICLE', ['を'])])]), Tree('V', [Tree('V_WHOLE', ['食べる'])])])])])])]
> Found 1 parse tree:

Tree 1:
                   S                               
                   |                                
                   S1                              
        ___________|__________________              
       |                              VP           
       |                              |             
       |                             V_NP          
       |                     _________|________     
       |                    NP                 |   
       |                    |                  |    
    NP_TOPIC           NP_PARTICLE             |   
  _____|______      ________|_________         |    
 |            NP  

KeyboardInterrupt: Interrupted by user

In [57]:
parser = BinaryJapaneseParser()
parser.analyze_sentence("猫は本を見る") # кошка видит книгу


Analyzing: 「猫は本を見る」
print(trees)
[Tree('S', [Tree('S1', [Tree('NP_TOPIC', [Tree('NP', [Tree('NOUN', ['猫'])]), 'は']), Tree('VP', [Tree('V_NP', [Tree('NP', [Tree('NP_PARTICLE', [Tree('NP', [Tree('NOUN', ['本'])]), Tree('PARTICLE', ['を'])])]), Tree('V', [Tree('V_WHOLE', ['見る'])])])])])])]
> Found 1 parse tree:

Tree 1:
                   S                               
                   |                                
                   S1                              
        ___________|__________________              
       |                              VP           
       |                              |             
       |                             V_NP          
       |                     _________|________     
       |                    NP                 |   
       |                    |                  |    
    NP_TOPIC           NP_PARTICLE             |   
  _____|______      ________|_________         |    
 |            NP   NP                 |        V   


True

In [58]:
parser = BinaryJapaneseParser()
parser.analyze_sentence("エドゥアルドクリシンスキは学生を見ます") # Эдуард Клышинский посмотрит на студентов


Analyzing: 「エドゥアルドクリシンスキは学生を見ます」
print(trees)
[Tree('S', [Tree('S1', [Tree('NP_TOPIC', [Tree('NP', [Tree('NOUN', ['エドゥアルドクリシンスキ'])]), 'は']), Tree('VP', [Tree('V_NP', [Tree('NP', [Tree('NP_PARTICLE', [Tree('NP', [Tree('NOUN', ['学生'])]), Tree('PARTICLE', ['を'])])]), Tree('V', [Tree('V_STEM', ['見']), Tree('V_INFL', ['ます'])])])])])])]
> Found 1 parse tree:

Tree 1:
                           S                                              
                           |                                               
                           S1                                             
        ___________________|_________________________                      
       |                                             VP                   
       |                                             |                     
       |                                            V_NP                  
       |                             ________________|___________          
       |                       

True