Last updated 27 February 2021

An `ipywidgets`-based Algonquian-English/English-Algonquian translation gui.

To run in a browser window, use the following command in the command line:

> `voila gui.ipynb`

In [1]:
import pandas
import ipywidgets as widgets
import re

In [2]:
# Gather all the entries from entries.csv

class Entry():
    def __init__(self, alg, eng, gloss, analysis, verb_type=None):
        self.alg = alg
        self.eng = eng
        self.gloss = gloss
        self.analysis = analysis
        self.verb_type = verb_type
        
        self.eng_lower = self.eng.lower()
    
entries_df = pandas.read_csv('entries.csv',header=1)
entries = []
for row in entries_df.itertuples():
    alg = row.Algonquian
    eng = row.English
    gloss = row.Gloss
    analysis = row.Analysis
    new_entry = Entry(alg,eng,gloss,analysis)
    entries.append(new_entry)

eng_entries = [entry.eng_lower for entry in entries]
alg_entries = [entry.alg for entry in entries]

In [3]:
# Gather all verb stems from glossary.csv

def insert_some(text,end):
    text = re.sub('__',' some'+end+' ',text)
    text = re.sub('_',' ',text)
    return text

class Stem():
    def __init__(self,eng,TA=None,TI=None,AI=None,II=None):
        self.eng = eng
        self.TA = TA if TA != '-' else None
        self.TI = TI if TI != '-' else None
        self.AI = AI if AI != '-' else None
        self.II = II if II != '-' else None
        self.alg = list(set([self.TA,self.TI,self.AI,self.II]))
        self.alg.remove(None)
        
        self.eng_w_just_space = re.sub('_+',' ',self.eng)
        
        eng_w_someone = insert_some(eng,'one')
        eng_w_something = insert_some(eng,'thing')
        
        self.eng_w_some = eng_w_someone if self.TA else eng_w_something
        self.eng_w_spaces = list(set([self.eng_w_just_space,self.eng_w_some]))
        
glossary_df = pandas.read_csv('glossary.csv')

stems = []
for row in glossary_df.itertuples():
    eng = row.eng_updated_210129
    TA = row.TA if row.TA != '-' else None
    TI = row.TI if row.TI != '-' else None
    AI = row.AI if row.AI != '-' else None
    II = row.II if row.II != '-' else None
    new_stem = Stem(eng,TA,TI,AI,II)
    stems.append(new_stem)
    
eng_stems_w_spaces = [stem.eng_w_spaces for stem in stems]
eng_stems_w_spaces = [item for sublist in eng_stems_w_spaces for item in sublist]

alg_stems = [stem.alg for stem in stems if "/" not in stem.alg]
alg_stems = list(set([item for sublist in alg_stems for item in sublist]))
alg_stems.extend(['mir','mis'])

In [4]:
# define a function to clean up and classify the input
punctuation = "\"'.,!?"

def clean_input(text):
    text = text.strip().lower()
    text = ''.join([c for c in text if c not in punctuation])
    text = re.sub('dont',"do not", text)
    text = re.sub('you guys','yall',text)
    text = re.sub(' the ',' that ',text)
    text = re.sub(r'\s\s+',' ',text)
    
    input_type = ('none','none')
    
    fixed_text = re.sub('^to ','',text)
    fixed_text = re.sub('^(is|am|are) ','be ',text)
        
    if 'be '+text in eng_stems_w_spaces:
        fixed_text = 'be '+text
    if text == 'has':
        fixed_text = 'have'
        
    if text in alg_entries:
        input_type = ('conjugated', 'Algonquian')
        
    elif text in alg_stems:
        input_type = ('stem', 'Algonquian')
        
    elif text in eng_entries:        
        input_type = ('conjugated','English')
        
    elif text in eng_stems_w_spaces or fixed_text in eng_stems_w_spaces:
        input_type = ('stem', 'English')
        
    else:
        input_type = ('invalid','invalid')
        
    return text, input_type

In [5]:
eng_format_map = {entry.eng_lower:entry.eng for entry in entries}

eng_words = []
for sentence in eng_entries:
    new_words = sentence.split()
    eng_words.extend([w for w in new_words if w not in eng_words])

valid_words = eng_words + alg_entries + ['will']

In [6]:
def handle_invalid_sentence(text):
    if 'will' in text:
                    return 'You may be trying to translate a future-tense sentence. Currently, the dictionary only ' + \
                           'supports present tense.\nBut you can easily get the future tense by putting the word "mus"' + \
                           'in front of the present tense! For example:\n' + \
                           '\t"I see you."\t\t==> "kunáwush"\n' + \
                           '\t"I will see you."\t==> "mus kunáwush"'
    
    for word in text.split(' '):
        if word not in valid_words or word in ['something']:

            not_allowed = ['it','thing','something']
            for w in not_allowed:
                if ' '+w in text or re.compile('^'+w).match(text):
                    return 'It looks like you used the word "'+w+'." Try replacing this word with one of the following:\n' +\
                            '\n\t'.join(['\tthat NA','those NAs','that NI','those NIs',
                                       'a NA','some NAs','a NI','some NIs'])

            pronouns = ['she','they','him','her','them','he']
            for p in pronouns:
                if word == p:
                    return 'It looks like you used the word "'+p+'." Try replacing this word with one of the following:\n' +\
                            '\n\t'.join(['\tthat NA','those NAs','a NA','some NAs'])                

            message = "The word \""+word+'" is not recongized.\n'
            if 'a' in word or 'o' in word:
                message += "Try double checking the spelling of special characters (á, ô)!"

            return message
    return "This phrase was not found in the dictionary."

In [7]:
def translate(text):
    warning = None
    found = True
    
    text = clean_input(text)[0]
    text = re.sub("y'all","yall",text)
    
    language = clean_input(text)[1][1]
    text_type = clean_input(text)[1][0]
   
    if text_type == 'stem':
        return "This looks like a lone verb stem.\nTry using the Search function instead."
    
    elif text_type == 'conjugated':
    
        if language == 'Algonquian':
            trans_analys = [(entry.eng,entry.analysis) for entry in entries if entry.alg == text]

        elif language == 'English':
            trans_analys = [(entry.alg,entry.analysis) for entry in entries if entry.eng_lower == text]

            r_we = re.compile('^we(?!-)|\swe(?!-)')
            r_youyall = re.compile('.*you.*|.*yall.*')
            if r_we.match(text) and not r_youyall.match(text):
                warning = 'Heads up! It looks like you\'ve used the word "we." If you mean "we'+ \
                            ' (including the person I\'m talking to)", then try using "we-inc" instead.\n\n'

            r_us = re.compile('.*us(?!-)$|.*\sus(?!-)\s')
            if r_us.match(text) and not r_youyall.match(text):
                warning = 'Heads up! It looks like you\'ve used the word "us." If you mean "us'+ \
                            ' (including the person I\'m talking to)", then try using "us-inc" instead.\n\n'

            text = eng_format_map[text]+'.'
            
        def output(trans,analys):
            return '\nTranslation:\t'+trans + \
                   '\nBreak it down:\t'+analys

        num_translations = len(trans_analys)

        message = language+':\t'+text + \
                 '\n\n{} translation(s) of "{}" were found:\n'.format(num_translations,text) + \
                 '\n'.join([output(tr,an) for tr,an in trans_analys])

        if warning:
            message = warning + output

        return message
        
    else:
        return handle_invalid_sentence(text)

In [8]:
def on_button_clicked(b):
    response = translate(w_text.value)
    w_output.clear_output()
    with w_output:
        print(response)

In [9]:
def get_all_forms(text):
    num_forms = 0
    text = clean_input(text)[0]
    
    language = clean_input(text)[1][1]
    
    if clean_input(text)[1][0] == 'conjugated':
        return "This looks like a conjugated form. Try searching for just the verb stem.\nOr, try " + \
               "using the Translate function instead."
    
    text = re.sub('^to ','',text)
    text = re.sub('^(is|am|are) ','be ',text)
    
    if 'be '+text in eng_stems_w_spaces:
        text = 'be '+text
    if text == 'has':
        text = 'have'
        
    if text not in eng_stems_w_spaces and text not in alg_stems:
        return '"' + text + '" was not found in the dictionary.'
        
    if text in eng_stems_w_spaces:
        eng = [stem.eng for stem in stems if text in stem.eng_w_spaces][0]
        r_eng = re.compile(eng)
        forms_transl = [(entry.eng,entry.alg) for entry in entries if r_eng.match(entry.gloss)]
        
    elif text in alg_stems:
        r_analys = re.compile('^(ku|nu|wu)+\+'+text)
        forms_transl = [(entry.alg,entry.eng) for entry in entries if r_analys.match(entry.analysis)]
    
    num_forms = len(forms_transl)
    
    message = '"{}" is a(n) {} verb stem.\n'.format(text,language) + \
              '{} forms of "{}" were found\n\n'.format(num_forms,text) + \
              '\n'.join([f+'\t\t'+t for f,t in forms_transl])
    
    return message

In [10]:
def on_button_clicked2(b):
    response = get_all_forms(w_text2.value)
    w_output2.clear_output()
    with w_output2:
        print(response)

In [11]:
def find_same_pattern(text):
    
    text = clean_input(text)[0]
    language = clean_input(text)[1][1]
    text_type = clean_input(text)[1][0]
    
    if text_type == 'stem':
        return 'It looks like this is a verb stem.\n' + \
               'Try searching for a conjugated Algonquian verb or a full English sentence.'
    
    elif text_type == 'conjugated':
        if language == 'Algonquian' and text_type == 'conjugated':
            glosses = [re.sub('^([a-z]|_)+(?!12ID)','',entry.gloss) for entry in entries if entry.alg == text]
            results = []
            for gloss in glosses:
                new_results = [(entry.alg,entry.analysis,entry.eng+'.') for entry in entries if
                           re.sub('^([a-z]|_)+(?!12ID)','',entry.gloss)==gloss]
                results.append(new_results)

            title = 'Algonquian\t\tAnalysis\t\tEnglish\n'+\
                    '----------\t\t--------\t\t-------\n'

        elif language == 'English' and text_type == 'conjugated':
            gloss = [re.sub('^([a-z]|_)+(?!12ID)','',entry.gloss) for entry in entries if entry.eng_lower == text][0]
            results = [[(entry.eng+'.',entry.alg,entry.analysis) for entry in entries if
                       re.sub('^([a-z]|_)+(?!12ID)','',entry.gloss)==gloss]]

            title = 'English\t\t\tAlgonquian\t\tAnalysis\n'+\
                    '-------\t\t\t----------\t\t--------\n'

        output = '\n\n'.join(['\n'.join([al+'\t\t'+an+'\t\t'+en for al,an,en in result]) for result in results])
        
        message = title + output
        return message
    
    else:
        return handle_invalid_sentence(text)
    
        
def on_button_clicked3(b):
    response = find_same_pattern(w_text3.value)
    w_output3.clear_output()
    with w_output3:
        print(response)

In [16]:
w_mainheader = widgets.HTML('<h1><b>Algonquian Dictionary</b></h>')
        
w_header = widgets.HTML('<h2><b>Translate</b></h>')
w_text = widgets.Textarea(placeholder='Write something (English or Algonquian)!', layout=widgets.Layout(width ='40%'))
w_button = widgets.Button(description='Translate')
w_button.on_click(on_button_clicked)
w_output = widgets.Output()

w_header2 = widgets.HTML('<h2><b><br>See all forms of a verb</b></h>')
w_text2 = widgets.Textarea(placeholder='Search for an English verb or an Algonquian verb stem',
                              layout=widgets.Layout(width='40%'))
w_button2 = widgets.Button(description='Search')
w_button2.on_click(on_button_clicked2)
w_output2 = widgets.Output()

w_header3 = widgets.HTML('<h2><b><br>Find words with the same pattern</b></h>')
w_text3 = widgets.Textarea(placeholder='Search for a conjugated Algonquian verb or English sentence', layout=widgets.Layout(width='40%'))
w_button3 = widgets.Button(description='Search')
w_button3.on_click(on_button_clicked3)
w_output3 = widgets.Output()

ui_items = [w_mainheader,w_header,w_text,w_button, w_output,
            w_header2,w_text2,w_button2,w_output2,
            w_header3,w_text3,w_button3,w_output3]
w_ui = widgets.VBox(ui_items, layout=widgets.Layout(align_items='center'))
display(w_ui)

VBox(children=(HTML(value='<h1><b>Algonquian Dictionary</b></h>'), HTML(value='<h2><b>Translate</b></h>'), Tex…