In [1]:
import json
import pandas as pd
import os
import re
from lapp.dbms import init_db, inserts, modify, delete, find_by_attr, insert
from lapp.tables import Unit, Vocabulary, GrammarRule, Language, CalligraphyCharacter


In [2]:
language_id = "zh"

## Create a Language

In [3]:
engine, session = init_db()

if not os.path.exists("../db"):
    os.makedirs("../db")
    print(f"Created directory for database: ../db")

language = Language(
    id=language_id.upper(),
    name="Chinois",
    native_name="中文",
    level="A1",
    flag="🇨🇳"
)
insert(session, language)

session.close()

2025-07-09 18:44:07,737 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:07,737 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-09 18:44:07,737 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,738 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("language")
2025-07-09 18:44:07,738 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,738 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-09 18:44:07,738 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,738 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("unit")
2025-07-09 18:44:07,739 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,739 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-09 18:44:07,739 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,739 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("calligraphy_character")
2025-07-09 18:44:07,739 INFO sqlalchemy.en

## Add Units

In [4]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

# Initialize the database
with open(f'../data/{language_id}/units.json', 'r', encoding="utf8") as f:
    units_array = json.load(f)
    
units = []
for idx, unit_data in enumerate(units_array):
    units.append(
        Unit(
            id = f"{language_id.upper()}_{idx}",
            title = unit_data['title'],
            description = unit_data['description'],
            level = unit_data['level'],
            parent = language,
        )
    )
inserts(session, units)
session.close()

2025-07-09 18:44:07,763 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:07,763 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-09 18:44:07,763 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,763 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,764 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-09 18:44:07,765 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,765 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-09 18:44:07,765 INFO sqlalchemy.engi

  session.commit()


## Add Vocabulary

In [5]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

directory_path = f'../data/{language_id}/vocabulary'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No vocabulary found for {language_id}.")

vocs = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for voc_file in elements_paths:
    if os.path.isfile(voc_file) and voc_file.endswith('.csv'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(voc_file))
        df = pd.read_csv(voc_file)
        for idx, row in df.iterrows():
            vocs.append( 
                Vocabulary(
                    id=f"{language_id.upper()}_{unit_id}_V{idx}",
                    word = row['word'],
                    translation = row["translation"],
                    phonetic = row["pinyin"],
                    example_sentence = row.get("example_sentence", ""),
                    type = row["type"],
                    parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
                )
            )
inserts(session, vocs)
session.close()

2025-07-09 18:44:07,798 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:07,798 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-09 18:44:07,798 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,798 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-09 18:44:07,798 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-09 18:44:07,799 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:07,800 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-09 18:44:07,800 INFO sqlalchemy.engi

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()


2025-07-09 18:44:07,967 INFO sqlalchemy.engine.Engine [cached since 0.1642s ago] ('ZH_15', 1, 0)
2025-07-09 18:44:07,968 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:07,968 INFO sqlalchemy.engine.Engine [cached since 0.1649s ago] ('ZH_15', 1, 0)
2025-07-09 18:44:07,968 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:07,969 INFO sqlalchemy.engine.Engine [cached since 0.1656s ago] ('ZH_15', 1, 0)
2025-07-09 18:44:07,969 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, 

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()


2025-07-09 18:44:08,004 INFO sqlalchemy.engine.Engine [cached since 0.2015s ago] ('ZH_10', 1, 0)
2025-07-09 18:44:08,005 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:08,005 INFO sqlalchemy.engine.Engine [cached since 0.2021s ago] ('ZH_10', 1, 0)
2025-07-09 18:44:08,005 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:08,006 INFO sqlalchemy.engine.Engine [cached since 0.2026s ago] ('ZH_10', 1, 0)
2025-07-09 18:44:08,006 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, 

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  session.commit()


2025-07-09 18:44:08,171 INFO sqlalchemy.engine.Engine [cached since 0.02776s ago] ('ZH_2',)
2025-07-09 18:44:08,172 INFO sqlalchemy.engine.Engine INSERT INTO vocabulary (id, word, translation, phonetic, example_sentence, type, score, last_seen, unit_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2025-07-09 18:44:08,172 INFO sqlalchemy.engine.Engine [cached since 0.02974s ago] ('ZH_2_V20', '个', 'Classification universelle', 'Gè', '', 'Divers', 0, '2025-07-09', 'ZH_2')
2025-07-09 18:44:08,172 INFO sqlalchemy.engine.Engine COMMIT
2025-07-09 18:44:08,173 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:08,173 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
2025-07-09 18:44:08,173 INFO sqlalchemy.engine.Engine [cached since 0.0293s ago] ('ZH_2',)
2025-07-09 18:44:08,173 INFO sqlalchemy.engine.En

## Add Grammar Rules

In [6]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

directory_path = f'../data/{language_id}/grammar'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No grammar files found for {language_id}.")

grammars = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for grammar_file in elements_paths:
    if os.path.isfile(grammar_file) and grammar_file.endswith('.json'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(grammar_file))
        with open(grammar_file, 'r', encoding="utf8") as f:
            grammar_data = json.load(f)
        for idx, row in enumerate(grammar_data):
            grammars.append( 
                GrammarRule(
                    id=f"{language_id.upper()}_{unit_id}_G{idx + 1}",
                    title=row['title'],
                    explanation=row["content"],
                    parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
                )
            )
inserts(session, grammars)
session.close()

2025-07-09 18:44:09,666 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:09,667 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-09 18:44:09,671 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,673 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-09 18:44:09,673 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,674 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-09 18:44:09,674 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,674 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-09 18:44:09,674 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,675 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-09 18:44:09,675 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,675 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-09 18:44:09,675 INFO sqlalchemy.engi

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  session.commit()


## Add Characters

In [7]:
# Initialize the database connection and create db file if it doesn't exist
from lapp.api.main import unit


engine, session = init_db()

directory_path = f'../data/{language_id}/character'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No character files found for {language_id}.")

characters = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for character_file in elements_paths:
    if os.path.isfile(character_file) and character_file.endswith('.csv'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(character_file))
        print(f"Processing: {unit_id}")
        df = pd.read_csv(character_file)
        for idx, row in enumerate(df.itertuples(index=False)):
            characters.append(
                CalligraphyCharacter(
                    id=f"{language_id.upper()}_{unit_id}_C{idx + 1}",
                    character=row.character,
                    components=row.components,
                    phonetic=row.phonetic,
                    meaning=row.meaning,
                    example_word=row.example_word,
                    parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
                )
            )
inserts(session, characters)
session.close()

2025-07-09 18:44:09,977 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:09,977 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-09 18:44:09,978 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,978 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-09 18:44:09,978 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,979 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-09 18:44:09,979 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,979 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-09 18:44:09,980 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,980 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-09 18:44:09,980 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-09 18:44:09,981 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-09 18:44:09,981 INFO sqlalchemy.engi

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()

2025-07-09 18:44:10,186 INFO sqlalchemy.engine.Engine [cached since 0.202s ago] ('ZH_13', 1, 0)
2025-07-09 18:44:10,187 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:10,187 INFO sqlalchemy.engine.Engine [cached since 0.2029s ago] ('ZH_13', 1, 0)
2025-07-09 18:44:10,188 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-09 18:44:10,188 INFO sqlalchemy.engine.Engine [cached since 0.2035s ago] ('ZH_13', 1, 0)
2025-07-09 18:44:10,188 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, u

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  session.commit()


2025-07-09 18:44:10,397 INFO sqlalchemy.engine.Engine INSERT INTO calligraphy_character (id, character, components, phonetic, meaning, example_word, score, last_seen, unit_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2025-07-09 18:44:10,398 INFO sqlalchemy.engine.Engine [cached since 0.1692s ago] ('ZH_4_C16', '因', '口 (clef : enceinte) + 大', 'yīn', 'Raison', '因为 yīnwèi : Parce que', 0, '2025-07-09', 'ZH_4')
2025-07-09 18:44:10,399 INFO sqlalchemy.engine.Engine COMMIT
2025-07-09 18:44:10,401 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-09 18:44:10,403 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
2025-07-09 18:44:10,404 INFO sqlalchemy.engine.Engine [cached since 0.1734s ago] ('ZH_4',)
2025-07-09 18:44:10,406 INFO sqlalchemy.engine.Engine INSERT INTO calligraphy_character (id, character, compo