# Process Dictionaries

Turn the dictionary csv files into HTML tables

In [103]:
import os
import pandas as pd
import re


In [104]:
dictionary_dir = "./dictionaries"
dictionary_name = "technical-physics-research-journey"
dictionary_file = os.path.join(dictionary_dir, f"{dictionary_name}.csv")

df = pd.read_csv(dictionary_file)

# Fill NaNs with ?
df = df.fillna('???')
df.head()

Unnamed: 0,en_US.word,en_US.type,en_US.definition,en_US.wikipedia,en_US.wiktionary,en_US.sentence,en_US.notes,en_US.image,fi.word,fi.type,...,fi.notes,fi.image,ru_RU.word,ru_RU.type,ru_RU.definition,ru_RU.wikipedia,ru_RU.wiktionary,ru_RU.sentence,ru_RU.notes,ru_RU.image
0,research,noun,???,???,???,???,???,???,tutkimus,???,...,???,???,исследование,???,???,???,???,???,???,???
1,to research,verb,???,???,???,???,???,???,tutkia,???,...,???,???,изучать,???,???,???,https://en.wiktionary.org/wiki/%D0%B8%D0%B7%D1...,???,???,???
2,research journey,noun,???,???,???,???,???,???,tutkimusseikkailu,???,...,???,???,???,???,???,???,???,???,???,???
3,graduate student,noun,???,???,???,???,???,???,jatko-opiskelija,???,...,???,???,аспирант,???,???,???,???,???,???,???
4,Doctoral Researcher (Ph.D.),noun,???,???,???,???,???,???,Väitöskirjatutkija,???,...,???,???,???,???,???,???,???,???,???,???


In [105]:
# Remove Columns with only NaNs
df.dropna(axis=1, how='all',inplace=True)
# Keep the words columns
df = df.filter(like='word')

df.head()


Unnamed: 0,en_US.word,fi.word,ru_RU.word
0,research,tutkimus,исследование
1,to research,tutkia,изучать
2,research journey,tutkimusseikkailu,???
3,graduate student,jatko-opiskelija,аспирант
4,Doctoral Researcher (Ph.D.),Väitöskirjatutkija,???


In [106]:

# Language code to Name
language_codes = [
    {'en_US': 'English (American)'},
    {'fi': 'Finnish (Suomi)'},
    {'ru_RU': 'Russian (Русский)'}
]

column_rename = {
    'en_US.word': 'English (American)',
    'fi.word': 'Finnish (Suomi)',
    'ru_RU.word': 'Russian (Русский)'}

df.rename(columns=column_rename, inplace=True)
df.head()

Unnamed: 0,English (American),Finnish (Suomi),Russian (Русский)
0,research,tutkimus,исследование
1,to research,tutkia,изучать
2,research journey,tutkimusseikkailu,???
3,graduate student,jatko-opiskelija,аспирант
4,Doctoral Researcher (Ph.D.),Väitöskirjatutkija,???


In [107]:
from pytablewriter import MarkdownTableWriter

writer = MarkdownTableWriter(dataframe=df)
writer.write_table()

# change the output stream to a file
output_path = os.path.join(dictionary_dir, f"{dictionary_name}.md")
with open(output_path, "w") as f:
    writer.stream = f
    writer.write_table()

|        English (American)         |            Finnish (Suomi)             |    Russian (Русский)    |
|-----------------------------------|----------------------------------------|-------------------------|
|research                           |tutkimus                                |исследование             |
|to research                        |tutkia                                  |изучать                  |
|research journey                   |tutkimusseikkailu                       |???                      |
|graduate student                   |jatko-opiskelija                        |аспирант                 |
|Doctoral Researcher (Ph.D.)        |Väitöskirjatutkija                      |???                      |
|Computer Science                   |Tietojenkäsittelytiede                  |Информатика              |
|Software Engineer                  |Ohjelmistokehittäjä                     |Инженер-программист      |
|company                            |yhtiö             