In [1]:
import pandas as pd
import json
import regex as re

In [2]:
conversions = pd.read_excel('../data/conversions.xlsx')
conversions

In [3]:
words = pd.read_csv('../data/phrx_words.csv', encoding='utf-8')
words

In [4]:
words['root'].to_list()
new_root = []
for roots in words['root']:
    if pd.isna(roots):
        new_root.append(float('nan'))
    elif roots:
        new_root.append(roots.split(','))
    else:
        new_root.append(roots)
new_root

In [5]:
words['root'] = new_root
words

In [6]:
raw = pd.read_csv('../data/phrx_raw.tsv', sep='\t')
raw

In [7]:
def parse_translit(raw:str):
    parsed = [char for char in raw]
    char_i = 0
    while char_i < len(parsed):
        if parsed[char_i] in ["-", "+"]:
            parsed[char_i] = parsed[char_i] + parsed[char_i+1]
            del parsed[char_i+1]
            char_i -= 1
        if parsed[char_i] in ["e", "w", "v", "r", "d", "q", "x", "y"]:
            parsed[char_i-1] = parsed[char_i-1] + parsed[char_i]
            del parsed[char_i]
            char_i -= 1
        char_i += 1
    return parsed

def translit_to_font(letters:list, conversion_sheet:dict):
    result = ''
    for char in letters:
        if char in conversion_sheet.keys():
            result = result + conversion_sheet[char]
        else:
            result = result + char
    return result

In [8]:
wiz_conv = {}
for idx in conversions.index:
    wiz_conv[conversions.loc[idx, 'Transliteration']] = conversions.loc[idx, 'Regular']
wiz_conv['\\'] = '^'
wiz_conv['|'] = '-'

In [9]:
wiz_list = []
for idx in raw.index:
    wiz_list.append(translit_to_font(parse_translit(raw.loc[idx, 'Text']), wiz_conv))

In [10]:
raw['Regular'] = wiz_list
raw

In [11]:
raw = raw.drop(columns=['Text'])

In [12]:
vowels = 'aeiouyøɒə'
vowels_re = '[aeiouyøɒə]*'
roots = words[words['type'].isin(['root', 'name', 'unclear'])]['phyrexian'].tolist()
composites = words.query('type == "composite"')['phyrexian'].to_list()
moods = words.query('type == "marker"')['phyrexian'].to_list()

In [13]:
def unvowel(query):
    new = query
    for vow in vowels:
        new = new.replace(vow, '')
    return new

In [14]:
def raw_split(raw_sample):
    return re.split('\.\^|\-|\ |\:', raw_sample.rstrip('.').lstrip('^'))

In [15]:
def unroot_raw(raw_thing):
    result = []
    raw_split = re.split('\.\^|\-|\ |\:', raw_thing)
    for word in raw_split:
        un_word = unvowel(word)
        matches = []
        for root in roots:
            un_root = unvowel(root)
            if un_root in un_word:
                matches.append(un_root)
        stripped = word
        for match in matches:
            if stripped != '':
                stripped = stripped.replace(re.search(vowels_re.join(match), stripped).string, '')
        if stripped != '':
            result.append(stripped)
    return result

In [16]:
all_matches = []
for idx in words.index:
    matches = []

    for raw_idx in raw.index:
        name = raw.loc[raw_idx, 'Name']
        raw_listed = raw_split(raw.loc[raw_idx, 'Regular'])
        for word in raw_listed:
            match words.loc[idx, 'type']:
                case 'root' | 'composite' | 'unclear':
                    if re.search(vowels_re.join(words.loc[idx, 'phyrexian']), word):
                        if name not in matches:
                            matches.append(name)
                case 'name' | 'number':
                    if words.loc[idx, 'phyrexian'] in word:
                        if name not in matches:
                            matches.append(name)
                case 'marker':
                    if words.loc[idx, 'phyrexian'] in word:
                        if word.replace(words.loc[idx, 'phyrexian'], '') in moods + ['']:
                            if name not in matches:
                                matches.append(name)
    all_matches.append(matches)

In [17]:
all_matches

In [18]:
words['examples'] = all_matches

In [19]:
final = {}
final['dictionary'] = words.to_dict(orient='index')
final['samples'] = raw.to_dict(orient='index')
final_json = json.dumps(final, indent = 4, ensure_ascii=False).replace('NaN', 'null')

In [20]:
out = open('../PhyrexianAPI', 'w', encoding='utf-8')
out.write(final_json)
out.close()