# Running the MGSMT Parser on Examples from the CoNLL-2022 paper.

***by Sagar Indurkhya (indurks@mit.edu)***

## Prelude: Imports and Setting up the Parser

In [None]:
from IPython.core.display import display, HTML
# Enable the following line to make the Jupyter notebook take up the full width of the browser window.
#display(HTML("<style>.container { width:100% !important; }</style>"))
# Enable the following line if working on a "retina" screen (e.g. a MBP).
%config InlineBackend.figure_format = 'retina'

In [None]:
import json
import pprint as pp

import mgsmt
import mgsmt.experiments.parserexperiment
from mgsmt.experiments.parserexperiment import ParserExperiment
from mgsmt.experiments.parserexperiment import construct_parser_experiment_configurations

In [None]:
def run_parser(inferred_lexicon, 
               experiment_params, 
               evaluation_corpus, 
               extra_lexical_items, 
               include_LF_constraints=True, 
               include_PF_constraints=True,
               extract_all_parses=False,
               max_num_empty_lexical_items=2,
               max_num_movements=4,
               max_num_head_movements=2):
    # Preprocessing: Obtain an experiment configuration for running the parser.
    pe_configs = construct_parser_experiment_configurations(inferred_lexicon,
                                                            experiment_params,
                                                            evaluation_corpus,
                                                            extra_lexical_items)
    config = pe_configs[0]
    config['evaluate_intermediate_steps'] = False
    
    # Initialize the Parser.
    pe = ParserExperiment(params=config, 
                          other_args={"display.jupyter-widgets": True,
                                      "max_num_empty_lexical_items": max_num_empty_lexical_items,
                                      "max_num_movements": max_num_movements,
                                      "max_num_head_movements": max_num_head_movements})
    
    # Run the parser.
    pe.run(include_LF_constraints=include_LF_constraints, 
           include_PF_constraints=include_PF_constraints,
           extract_all_parses=extract_all_parses)

## Experiments.

### Run the MGSMT parser on each of seven specified pairing of interface conditions.

**Notes:** 
- The first six examples (i.e. A-F, which correspond to ICs $I_1$ to $I_6$) are run using the same parameter configuration, whereas the seventh and eighth examples (i.e. G-H, which corresponds to ICs $I_7$ and $I_8$) requires additional allocation of both empty categories and instances of head movement. 
- For each example, first the inputs (i.e. the lexicon and the interface conditions) are displayed, and then the outputs (i.e. the MG derivation and the lexical items used to generate that derivation) are shown. 
- When examining the display panel showing the derivation, you can view the conventional SVO depiction of the derivation by going to "Grammar > Options" and then unchecking "Display Head Movement Arrows" and "Display Phrasal Movement Arrows" (displaying these arrows can lead GraphViz to draw the derivation tree in an odd manner as it tries to avoid drawing overlapping edges).
- Although the parser is designed to process a large sequence of pairs of interface conditions, here we are running the parser on one pair of interface conditions at a time. As a consequence, you will see, and can ignore, the input and output display panels display "Lexicon (A)" repeatedly, even when being run on other lexicons, and likewise for the interface conditions displaying "$I_{0}$" repeatedly, even though we are processing other interface conditions.

In [None]:
def process_example(example_idx, 
                    example_code,
                    extract_all_parses=False,
                    max_num_empty_lexical_items=2,
                    max_num_movements=4,
                    max_num_head_movements=2):
    assert 0 <= example_idx < 8
    assert example_code in 'ABCDEFGH'
    print('-'*29 + f' Processing Example {example_code} ' + '-'*29)
    
    # Load the lexicon
    with open(f'experiment-data/lexicon-{example_code}.json', 'r') as f_in:
        lexicon = json.load(f_in)
    
    # Load the input sequence of paired interface conditions.
    with open('experiment-data/corpus-of-interface-conditions.json', 'r') as f_in:
        experiment_params = json.load(f_in)
        evaluation_corpus = experiment_params['input_sequence'][example_idx:example_idx+1]
    
    # Run the parser on the specified interface conditions using the given lexicon.
    run_parser(lexicon, 
               experiment_params, 
               evaluation_corpus, 
               extra_lexical_items=[],
               extract_all_parses=extract_all_parses,
               max_num_empty_lexical_items=max_num_empty_lexical_items,
               max_num_movements=max_num_movements,
               max_num_head_movements=max_num_head_movements)

In [None]:
# Run the parser on the first six examples, A-F.
for i, ex_id in enumerate('ABCDEF'):
    print()
    process_example(i, ex_id)
    print()

In [None]:
# Run the parser on the seventh example, G. 
process_example(6, 'G',
                max_num_empty_lexical_items=6,
                max_num_movements=6,
                max_num_head_movements=4)

In [None]:
# Run the parser on the eighth example, H. 
process_example(7, 'H',
                max_num_empty_lexical_items=6,
                max_num_movements=6,
                max_num_head_movements=4)

### Run the MGSMT parser on an input where only PF interface conditions are specified.

In [None]:
# Load the lexicon
with open('experiment-data/lexicon-A-extra-complementizer.json', 'r') as f_in:
    lexicon = json.load(f_in)

# Load the interface conditions.
with open('experiment-data/corpus-of-interface-conditions.json', 'r') as f_in:
    experiment_params = json.load(f_in)
    evaluation_corpus = experiment_params['input_sequence'][0:1]

# Run the parser on the specified interface conditions using the given lexicon.
run_parser(lexicon, 
           experiment_params, 
           evaluation_corpus, 
           extra_lexical_items=[],
           include_LF_constraints=False, 
           include_PF_constraints=True,
           extract_all_parses=True)

### Run the MGSMT parser on an input where only LF interface conditions are specified.

In [None]:
# Load the lexicon
with open('experiment-data/lexicon-A-extra-complementizer.json', 'r') as f_in:
    lexicon = json.load(f_in)

# Load the interface conditions.
with open('experiment-data/corpus-of-interface-conditions.json', 'r') as f_in:
    experiment_params = json.load(f_in)
    evaluation_corpus = experiment_params['input_sequence'][0:1]

# Run the parser on the specified interface conditions using the given lexicon.
run_parser(lexicon, 
           experiment_params, 
           evaluation_corpus, 
           extra_lexical_items=[],
           include_LF_constraints=True, 
           include_PF_constraints=False,
           extract_all_parses=True)

# End of Notebook