In [65]:
import pandas as pd
import json

# Derivative Decomposition of Complex Prepositional Phrases

## Step 1: initialize master lexicon

In [66]:
master_pp_lexicon = {}
# Populate from atomic_p.json: Iterate through every entry in atomic_p.json. For each preposition (e.g., "behind"), add it as a key to master_lexicon and its full feature object as the value.
with open('pp_lexicon/atomic_p.json', 'r') as f:
    atomic_p = json.load(f)
    for preposition, features in atomic_p.items():
        master_pp_lexicon[preposition] = features

In [67]:
# Populate from p_lexicon.json: Iterate through every entry in p_lexicon.json.
# for each key, value pair if the key already exists in master_lexicon, overwrite the value with the new value from p_lexicon.json.
with open('pp_lexicon/p_lexicon.json', 'r') as f:
    p_lexicon = json.load(f)
    for preposition, features in p_lexicon.items():
        if preposition in master_pp_lexicon:
            master_pp_lexicon[preposition] = features
        else:
            master_pp_lexicon[preposition] = features

In [68]:
len(master_pp_lexicon)

107

In [69]:
master_pp_lexicon.keys()

dict_keys(['behind', 'above', 'below', 'beyond', 'in front of', 'inside', 'outside', 'left', 'right', 'against', 'among', 'beside', 'between', 'near', 'next to', 'upon', 'across', 'along', 'around', 'over', 'past', 'through', 'under', 'up', 'down', 'on', 'off', 'in', 'out', 'away', 'top', 'astride', 'corner', 'by', 'edge', 'onto', 'after', 'but', 'end', 'into', 'upside', 'for', 'base', 'higher', 'high', 'bottom', 'via', 'aside', 'back', 'nearest', 'betwixt', 'at', 'ahead', 'astern', 'following', 'center', 'within', 'prior', 'without', 'front', 'before', 'opposite', 'from', 'beneath', 'with', 'rear', 'apart', 'towards', 'except', 'heart', 'afore', 'tween', 'aboard', 'throughout', 'foot', 'nearer', 'nigh', 'alongside', 'virtue', 'next', 'toward', 'amongst', 'side', 'the', 'of', 'far', 'amid', 'adjacent', 'underneath', 'skin', 'flank', 'part', 'plus', 'to', 'board', 'subsequent', 'addition', 'cross', 'surface', 'a', 'amidst', 'underside', 'middle', 'rim', 'face', 'close', 'core'])

## Step 2: add complex preposition list

In [70]:
df_pp_wordnet = pd.read_csv('dictionaries/pp_wordnet_wiki_pop.csv', sep=',')
pp_wordnet_wiki = list(df_pp_wordnet['preposition'])
# drop nan in pp_wordnet_wiki
pp_wordnet_wiki = [x for x in pp_wordnet_wiki if x is not None]


In [71]:
complex_pp_list = []
# check pp_wordnet_wiki against master_pp_lexicon keys
for preposition in pp_wordnet_wiki:
    if preposition not in master_pp_lexicon:
        complex_pp_list.append(preposition)

# remove nan type() float in complex_pp_list
complex_pp_list = [x for x in complex_pp_list if type(x) is str]
complex_pp_list = list(set(complex_pp_list))  # remove duplicates
complex_pp_list.sort()
len(complex_pp_list)

85

In [72]:
# add empty p_lexicon.json entry to complex_pp_list
counter = 0
for key, entry in p_lexicon.items():
    if entry["isAtomicMorph"] == "" and entry["class"] == "" and entry["path_p_morphology"] == "" and entry["measure_allowed"] == "" and entry["spellOutHEAD"] == [""]:
        complex_pp_list.append(key)
        counter += 1

print(counter)

31


In [73]:
with open('pp_lexicon/complex_pp.json', 'w') as f:
    json.dump(complex_pp_list, f, indent=4)
    
len(complex_pp_list)

116

In [74]:
complex_pp_list.remove('the')
complex_pp_list[-10:]

['virtue',
 'toward',
 'amongst',
 'adjacent',
 'underneath',
 'plus',
 'subsequent',
 'addition',
 'cross',
 'underside']

In [75]:
decomposed_result = {} # dictionary to store deomposition results

### get empty list lexicon in p_lexicon.json

In [82]:
# get empty or "" entries from p_lexicon.json
empty_entries = []
for key, entry in p_lexicon.items():
    if entry["isAtomicMorph"] == "" and entry["class"] == "" and entry["path_p_morphology"] == "" and entry["measure_allowed"] == "" and entry["spellOutHEAD"] == [""]:
        empty_entries.append(key)
    
print(len(empty_entries))

31


In [None]:
# copy p_lexicon.json to dictionary
filled_p_lexicon = {}

for key, entry in p_lexicon.items():
    if entry["isAtomicMorph"] == "" and entry["class"] == "" and entry["path_p_morphology"] == "" and entry["measure_allowed"] == "" and entry["spellOutHEAD"] == [""]:
        continue
    else:
        filled_p_lexicon[key] = entry
        
len(filled_p_lexicon.keys())

# with open('pp_lexicon/p_lexicon.json', 'w') as f:
#     json.dump(filled_p_lexicon, f, indent=4)


## Step 3: Decomposition Logic