23 February 2021

Last updated 26 February 2021

An `ipywidgets`-based Algonquian-English/English-Algonquian translation gui.

To run in a browser window, use the following command in the command line:

> `voila gui.ipynb`

In [34]:
import pandas
import ipywidgets as widgets
import re

In [67]:
entries = pandas.read_csv('entries.csv',header=1)

# eng2alg_dict good cause engs are unique
eng2alg_dict = entries.set_index('English').to_dict()['Algonquian']

# alg2eng_dict BAD cause algs are not unique
alg2eng_dict = {}
for eng, alg in eng2alg_dict.items():
    if alg not in alg2eng_dict.keys():
        alg2eng_dict[alg] = [eng]
    else:
        alg2eng_dict[alg].append(eng)

# eng2analysis_dict GOOD cause engs are unique
eng2analysis_dict = entries.set_index('English').to_dict()['Analysis']

# alg2analysis_dict BAD cause algs are not unique 
alg2analysis_dict = {}
for alg, engs in alg2eng_dict.items():
    alg2analysis_dict[alg] = [eng2analysis_dict[eng] for eng in engs]
    
gloss2eng_dict = entries.set_index('Gloss').to_dict()['English']
gloss2alg_dict = entries.set_index('Gloss').to_dict()['Algonquian']

analysis2eng_tup = list(zip(entries['Analysis'],entries['English']))

In [113]:
glossstring2alg_tup = []
for gloss, alg in gloss2alg_dict.items():
    glossstring = re.sub('^([a-z]|_)+(?!12ID)','',gloss)
    glossstring2alg_tup.append((glossstring,alg))
    
all_glossstrings = [gs[0] for gs in glossstring2alg_tup]

In [36]:
glossary = pandas.read_csv('glossary.csv')

stems_only_alg2eng = dict(zip(glossary['TA'].tolist(), glossary['eng_updated_210129']))
stems_only_alg2eng.update(dict(zip(glossary['TI'].tolist(), glossary['eng_updated_210129'])))
stems_only_alg2eng.update(dict(zip(glossary['AI'].tolist(), glossary['eng_updated_210129'])))
stems_only_alg2eng.update(dict(zip(glossary['II'].tolist(), glossary['eng_updated_210129'])))
del stems_only_alg2eng['-']

alg_stems = list(set(glossary['TA'].tolist()+glossary['TI'].tolist()+glossary['AI'].tolist()+glossary['II'].tolist()))
alg_stems.remove('-')
alg_stems = [i.split('/') for i in alg_stems]
alg_stems = [i for j in alg_stems for i in j]

eng_stems_original = glossary['eng_updated_210129'].tolist()
eng_stems_fixed = [re.sub('  ',' ', ' '.join(e.split('_'))) for e in eng_stems_original]

eng_transitivity_TA = dict(zip(glossary['TA'].tolist(),glossary['eng_updated_210129']))
eng_transitivity_TA = {v:'TA' for k,v in eng_transitivity_TA.items() if k != '-'}

eng_transitivity_TI = dict(zip(glossary['TI'].tolist(),glossary['eng_updated_210129']))
eng_transitivity_TI = {v:'TI' for k,v in eng_transitivity_TI.items() if k != '-'}

eng_transitivity = dict(eng_transitivity_TA, **eng_transitivity_TI)

needs_someone = [e for e in eng_stems_original if '__' in e and eng_transitivity[e]=='TA']
needs_something = [e for e in eng_stems_original if '__' in e and eng_transitivity[e]=='TI']

insert_someone = {(re.sub('__',' someone ',e)):e for e in eng_stems_original if e in needs_someone}
insert_someone = {re.sub('_',' ',k):v for k,v in insert_someone.items()}
insert_something = {re.sub('__',' something ',e):e for e in eng_stems_original if e in needs_something}
insert_something = {re.sub('_',' ',k):v for k,v in insert_something.items()}

eng_stems_map = dict(zip(eng_stems_fixed, eng_stems_original))
eng_stems_map.update(insert_someone)
eng_stems_map.update(insert_something)

capitalization_map = dict(zip([e.lower() for e in eng2alg_dict.keys()],eng2alg_dict.keys()))

punctuation = "\"'.,!?"

eng_words = []
for sentence in eng2alg_dict.keys():
    new_words = sentence.split()
    eng_words.extend([w.lower() for w in new_words if w.lower() not in eng_words])
    
valid_words = eng_words + list(eng2alg_dict.values()) + ['will']

In [37]:
def lookup_in_tuple(key,tuple_set):
    for tup in tuple_set:
        if tup[0] == key:
            return tup[1]

In [66]:
def clean_input(text):
    text = text.strip().lower()
    text = ''.join([c for c in text if c not in punctuation])
    text = re.sub('dont',"do not", text)
    text = re.sub('you guys','yall',text)
    text = re.sub(' the ',' that ',text)
    text = re.sub(r'\s\s+',' ',text)
    
    if text in capitalization_map.keys():
        text = capitalization_map[text]
    
    input_type = ('none','none')
        
    if text in alg2eng_dict.keys():
        input_type = ('conjugated', 'algonquian')
        
    elif text in eng2alg_dict.keys():        
        input_type = ('conjugated','english')
        
    elif text in alg_stems:
        input_type = ('stem', 'algonquian')
        
    elif text in eng_stems_map.keys():
        input_type = ('stem', 'english')
        
    return text, input_type

In [147]:
def translate(text):
    language = None
    warning = None
    
    text = clean_input(text)[0]
    
    text = re.sub("y'all","yall",text)

    if clean_input(text)[1][0] not in ['conjugated','none']:
        return "This looks like a single verb stem.\nTry using the Search function instead."
         
    if text in capitalization_map.keys():
        text = capitalization_map[text]
    
    if text in alg2eng_dict.keys():
        language = 'Algonquian'
        translations = alg2eng_dict[text]
        analyses = alg2analysis_dict[text]
        
    elif text in eng2alg_dict.keys():        
        language = 'English'
        
        r_we = re.compile('^we(?!-)|\swe(?!-)')
        r_youyall = re.compile('.*you.*|.*yall.*')
        if r_we.match(text.lower()) and not r_youyall.match(text.lower()):
            warning = 'Heads up! It looks like you\'ve used the word "we." If you mean "we'+ \
                        ' (including the person I\'m talking to)", then try using "we-inc" instead.'

        r_us = re.compile('.*us(?!-)$|.*\sus(?!-)\s')
        if r_us.match(text.lower()) and not r_youyall.match(text.lower()):
            warning = 'Heads up! It looks like you\'ve used the word "us." If you mean "us'+ \
                        ' (including the person I\'m talking to)", then try using "us-inc" instead.'
        
        translations = [eng2alg_dict[text]]
        analyses = [eng2analysis_dict[text]]
        
    else:   
        if 'will' in text:
                    return 'You may be trying to translate a future-tense sentence. Currently, the dictionary only ' + \
                           'supports present tense.\nBut you can easily get the future tense by putting the word "mus"' + \
                           'in front of the present tense! For example:\n' + \
                           '\t"I see you."\t\t==> "kunáwush"\n' + \
                           '\t"I will see you."\t==> "mus kunáwush"'
    
        for word in text.split(' '):
            if word not in valid_words or word in ['something']:
                
                not_allowed = ['it','thing','something']
                for w in not_allowed:
                    if ' '+w in text or re.compile('^'+w).match(text):
                        return 'It looks like you used the word "'+w+'." Try replacing this word with one of the following:\n' +\
                                '\n\t'.join(['\tthat NA','those NAs','that NI','those NIs',
                                           'a NA','some NAs','a NI','some NIs'])
                
                pronouns = ['she','they','him','her','them','he']
                for p in pronouns:
                    if word == p:
                        return 'It looks like you used the word "'+p+'." Try replacing this word with one of the following:\n' +\
                                '\n\t'.join(['\tthat NA','those NAs','a NA','some NAs'])                
                
                message = "The word \""+word+'" is not recongized.\n'
                if 'a' in word or 'o' in word:
                    message += "Try double checking the spelling of special characters (á, ô)!"
                
                return message
            
        return "This phrase was not found in the dictionary."
        
    def message(translation,analysis):
        return "Translation:\t"+translation+'.\n' + "Break it down:\t"+analysis
    
    output = '\n\n'.join([message(t,a) for t,a in zip(translations,analyses)])
    
    num_translations = str(len(translations))
    if num_translations == '1':
            middle = 'is 1 possible translation'
    else:
            middle = 'are '+num_translations+' possible translations'
    num_translations = 'There '+middle+' for "'+text+'":\n\n'
    
    output = num_translations + output
    
    if language:
        output = language + ':\t' + text + '.\n\n' + output
    if warning:
        output = warning + '\n\n' + output
    
    return output
        
def on_button_clicked(b):
    response = translate(w_text.value)
    w_output.clear_output()
    with w_output:
        print(response)
        
def get_all_forms(text):
    
    text = clean_input(text)[0]

    if clean_input(text)[1][0] not in ['stem','none']:
        return "This looks like a conjugated form. Try searching for just the verb stem.\nOr, try " + \
               "using the Translate function instead."

    text = re.sub('^to ','',text)
    text = re.sub('^(is|am|are) ','be ',text)
    if 'be '+text in eng_stems_map.keys():
        text = 'be '+text
    if text == 'has':
        text = 'have'
        
    if text not in eng_stems_map.keys() and text not in alg_stems:
        return '"' + text + '" was not found in the dictionary.'
    
    if text in eng_stems_map.keys():
        eng_stem_wunderscore = eng_stems_map[text]
        rg = re.compile(r"^"+eng_stem_wunderscore+r"\..*")
        all_forms = [gloss2eng_dict[e] for e in gloss2eng_dict.keys() if rg.match(e)]
        translations = [eng2alg_dict[eng] for eng in all_forms]
        num_forms = len(all_forms)
        
    elif text in alg_stems:
        # get all the analyses that start with text
        r_pref = re.compile(r"^(ku|nu|wu)\+"+text+r"(\+.*$|$)")
        r_nopref = re.compile(text+r"(\+.*$|$)")

        # translations should contain all the english translations of these uses
        translations = [tup[1] for tup in analysis2eng_tup if r_pref.match(tup[0]) or r_nopref.match(tup[0])]

        # all_forms should contain all the algonquian uses of mis
        all_forms = [eng2alg_dict[eng] for eng in translations]
        num_forms = len(all_forms)
        
    all_forms = [e+'\t\t'+a for e,a in zip(all_forms,translations)]
    output = '\n'.join(all_forms)
    message = str(num_forms)+' forms of "'+text+'" were found:\n\n'+output
    
    return message
        
def on_button_clicked2(b):
    response = get_all_forms(w_text2.value)
    w_output2.clear_output()
    with w_output2:
        print(response)
        
        
def find_same_pattern(text):
    
    text = clean_input(text)[0]
    
    if clean_input(text)[1][1] == 'algonquian':
        glosses = [tup[0] for tup in glossstring2alg_tup if tup[1]==text]
        all_forms = []
        translations = []
        for gloss in glosses:
            new_forms = [tup[1] for tup in glossstring2alg_tup if tup[0]==gloss and tup[1]!=text]
            all_forms.append('\n'.join(new_forms))
            
    output = '\n\n'.join(all_forms)
    return output
        
def on_button_clicked3(b):
    response = find_same_pattern(w_text3.value)
    w_output3.clear_output()
    with w_output3:
        print(response)

w_mainheader = widgets.HTML('<h1><b>Algonquian Dictionary</b></h>')
        
w_header = widgets.HTML('<h2><b>Translate</b></h>')
w_text = widgets.Textarea(placeholder='Write something (English or Algonquian)!', layout=widgets.Layout(width ='80%'))
w_button = widgets.Button(description='Translate')
w_button.on_click(on_button_clicked)
w_output = widgets.Output()

w_header2 = widgets.HTML('<h2><b>See all forms of a verb</b></h>')
w_text2 = widgets.Textarea(placeholder='Search for an English verb or an Algonquian verb stem',
                              layout=widgets.Layout(width='30%'))
w_button2 = widgets.Button(description='Search')
w_button2.on_click(on_button_clicked2)
w_output2 = widgets.Output()

w_header3 = widgets.HTML('<h2><b>Find words with the same pattern</b></h>')
w_text3 = widgets.Textarea(placeholder='Search for a conjugated Algonquian verb or English sentence', layout=widgets.Layout(width='80%'))
w_button3 = widgets.Button(description='Search')
w_button3.on_click(on_button_clicked3)
w_output3 = widgets.Output()

ui_items = [w_mainheader,w_header,w_text,w_button, w_output,
            w_header2,w_text2,w_button2,w_output2,
            w_header3,w_text3,w_button3,w_output3]
w_ui = widgets.VBox(ui_items, layout=widgets.Layout(align_items='center'))
display(w_ui)

VBox(children=(HTML(value='<h1><b>Algonquian Dictionary</b></h>'), HTML(value='<h2><b>Translate</b></h>'), Tex…