In [53]:
import pandas as pd
pd.set_option('display.max_rows', 500)

def get_sort(lemma):
    '''Takes a Welsh word and returns a code that can be sorted accurately due to the difference between alphabetical order in Welsh and English.'''
    
    def remove_accent(char) -> str :
        '''Removes accent if character is a vowel with an accent, otherwise returns character as it is.'''
        accents = {
            'âáàä': 'a',
            'êéèë': 'e',
            'îíìï': 'i',
            'ôóòö': 'o',
            'ûúùü': 'u',
            'ŵ': 'w',
            'ŷ': 'y'
        }
        
        for k in accents.keys():
            if char in k:
                return accents[k]
        
        return char

    welsh_alphabet = ['-', 'a', 'b', 'c', 'ch', 'd', 'dd', 'e', 'f', 'ff', 'g', 'ng', 'h', 'i', 'j', 'k', 'l', 'll',
                    'm', 'n', 'o', 'p', 'ph', 'r', 'rh', 's', 't', 'th', 'u', 'w', 'y']
    sort_codes = [str(x) for x in list(range(10))] + list('abcdefghijklmnopqrstuvwxyz')
    letter_sort = {k:v for k,v in zip(welsh_alphabet,sort_codes[:len(welsh_alphabet)])}
    output = ''
    i = 0
    lemma = lemma.lower()
    while i < len(lemma):
        if lemma[i:i+2] in welsh_alphabet:
            output += letter_sort[lemma[i:i+2]]
            i += 2
        else:
            char = remove_accent(lemma[i])
            if char in welsh_alphabet:
                output += letter_sort[char]
            else:
                output += '0'
            i += 1

    return output

In [56]:
def filter_df(list_of_values, column, df=df):
    pd.set_option('display.max_rows', 500)
    output = []

    for i,row in df.iterrows():
        for word in list_of_values:
            if word == row[column]:
                output.append(row.to_dict())

    return pd.DataFrame(output)


In [158]:
pd.set_option('display.max_rows', 5000)
df = pd.read_csv('abridged_dictionary.csv', index_col=0)

df = df[df['number'] != 'pl']
df.to_csv('abridged_dictionary.csv')

In [None]:
def get_word_details(lemma):
    df = pd.read_csv('geiriadur.csv', index_col=0)
    df = df[df['lemma'] == lemma].drop(['sort', 'gender','pos','extra','clar'], axis=1)
    df = df.sort_values('number').sort_values('tense')
    tenses = {'pres': 'Present',
              'past': 'Past',
              'fut': 'Future',
              'imper': 'Imperative (Commands)',
              'imperf': 'Imperfect',
              'pluperf': 'Pluperfect',
              'subj': 'Subjunctive'}
    number = ['1s', '2s', '3s','1p', '2p', '3p', '0', '\\N']
    pronoun_dict = {k:v for k,v in zip(number[:-2],['i', 'di', 'fe/fo/hi', 'ni', 'chi', 'nhw'])}
    english = df['enlemma'].unique()[0]

    impersonal_forms = df[df['number'] == '0']

    st.write('Word:', lemma)
    st.write('English translation:', english)
    st.write('Word type:', 'verb')

    for tense in tenses:
        st.write('\n\n'+tenses[tense]+':')
        filtered = df[df['tense']==tense].drop(['tense','lemma','enlemma'],axis=1)
        for pronoun in pronoun_dict:
            for i,row in filtered.iterrows():
                if row_info := return_row_info(row,pronoun,pronoun_dict):
                    st.write(row_info)
        try:
            impersonal_form = list(impersonal_forms[impersonal_forms['tense']==tense]['surface'])[0]
            st.write('---')
            st.write(f'Impersonal form: {impersonal_form}')
        except IndexError:
            pass

def return_row_info(row,pronoun,pronoun_dict):
    if row['number'] == pronoun:
        if row['surface'][-1] == 't' and pronoun == '2s':
            output =  f'{row['surface']} ti'
        elif row['surface'][-1] == 't' and pronoun == '3p':
            output =  f'{row['surface']} hwy'
        else:
            output = f'{row['surface']} {pronoun_dict[pronoun]}'
        if row['notes'] == 'spoken':
            output = '_' + output + ' (Spoken form)_' # type: ignore
        return output


get_word_details('yswirio')

Word: yswirio
English translation: insure
Word type: verb


Present:
yswiriaf i
yswiri di
yswiria fe/fo/hi
yswiriwn ni
yswiriwch chi
yswiriant hwy
_yswirian nhw (Spoken form)_
---
Impersonal form: yswirir


Past:
yswiriais i
yswiriaist ti
yswiriodd fe/fo/hi
yswiriasom ni
_yswirion ni (Spoken form)_
yswiriasoch chi
_yswirioch chi (Spoken form)_
yswiriasant hwy
_yswirion nhw (Spoken form)_
---
Impersonal form: yswiriwyd


Future:
_yswirith fe/fo/hi (Spoken form)_
_yswiriff fe/fo/hi (Spoken form)_


Imperative (Commands):
yswiria di
yswiried fe/fo/hi
yswiriwn ni
yswiriwch chi
_yswirien nhw (Spoken form)_
yswirient hwy
---
Impersonal form: yswirier


Imperfect:
yswiriwn i
_yswiriet ti (Spoken form)_
yswirit ti
yswiriai fe/fo/hi
yswiriem ni
yswiriech chi
yswirient hwy
---
Impersonal form: yswirid


Pluperfect:
yswiriaswn i
yswiriasit ti
yswiriasai fe/fo/hi
yswiriasem ni
yswiriasech chi
yswiriasent hwy
_yswiriasen nhw (Spoken form)_
---
Impersonal form: yswiriasid


Subjunctive:
yswiriwyf i


In [67]:
df

Unnamed: 0,surface,lemma,enlemma,number,tense,notes
210532,yswirith,yswirio,insure,3s,fut,spoken
210531,yswiriff,yswirio,insure,3s,fut,spoken
210508,yswiried,yswirio,insure,3s,imper,
210502,yswirier,yswirio,insure,0,imper,
210513,yswiriwn,yswirio,insure,1p,imper,
210510,yswirien,yswirio,insure,3p,imper,spoken
210509,yswirient,yswirio,insure,3p,imper,
210521,yswiria,yswirio,insure,2s,imper,
210484,yswiriwch,yswirio,insure,2p,imper,
210519,yswirid,yswirio,insure,0,imperf,
