In [1]:
import json
import pandas as pd
import os
import re
from lapp.dbms import init_db, inserts, modify, delete, find_by_attr, insert
from lapp.tables import Unit, Vocabulary, GrammarRule, Language


In [2]:
language_id = "zh"

## Create a Language

In [3]:
engine, session = init_db()

if not os.path.exists("../db"):
    os.makedirs("../db")
    print(f"Created directory for database: ../db")

language = Language(
    id=language_id.upper(),
    name="Chinois",
    native_name="中文",
    level="A1",
    flag="🇨🇳"
)
insert(session, language)

session.close()

2025-07-07 18:21:51,582 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-07 18:21:51,582 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-07 18:21:51,582 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,582 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("language")
2025-07-07 18:21:51,582 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("unit")
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-07 18:21:51,583 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,584 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("calligraphy_character")
2025-07-07 18:21:51,584 INFO sqlalchemy.en

## Add Units

In [4]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

# Initialize the database
with open(f'../data/{language_id}/units.json', 'r', encoding="utf8") as f:
    units_array = json.load(f)
    
units = []
for idx, unit_data in enumerate(units_array):
    units.append(
        Unit(
            id = f"{language_id.upper()}_{idx}",
            title = unit_data['title'],
            description = unit_data['description'],
            level = unit_data['level'],
            parent = language,
        )
    )
inserts(session, units)
session.close()

2025-07-07 18:21:51,607 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-07 18:21:51,607 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-07 18:21:51,608 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,609 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-07 18:21:51,609 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,609 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-07 18:21:51,609 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,610 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-07 18:21:51,610 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,610 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-07 18:21:51,610 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,610 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-07 18:21:51,610 INFO sqlalchemy.engi

  session.commit()


## Add Vocabulary

In [5]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

directory_path = f'../data/{language_id}/vocabulary'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No vocabulary found for {language_id}.")

vocs = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for voc_file in elements_paths:
    if os.path.isfile(voc_file) and voc_file.endswith('.csv'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(voc_file))
        df = pd.read_csv(voc_file)
        for idx, row in df.iterrows():
            vocs.append( 
                Vocabulary(
                    id=f"{language_id.upper()}_{unit_id}_V{idx}",
                    word = row['word'],
                    translation = row["translation"],
                    phonetic = row["pinyin"],
                    example_sentence = row.get("example_sentence", ""),
                    type = row["type"],
                    parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
                )
            )
inserts(session, vocs)
session.close()

2025-07-07 18:21:51,647 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-07 18:21:51,647 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-07 18:21:51,647 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,649 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-07 18:21:51,649 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,650 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-07 18:21:51,651 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,651 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-07 18:21:51,652 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,652 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-07 18:21:51,652 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:51,652 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-07 18:21:51,652 INFO sqlalchemy.engi

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()


2025-07-07 18:21:51,812 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-07 18:21:51,813 INFO sqlalchemy.engine.Engine [cached since 0.1563s ago] ('ZH_15', 1, 0)
2025-07-07 18:21:51,813 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-07 18:21:51,813 INFO sqlalchemy.engine.Engine [cached since 0.157s ago] ('ZH_15', 1, 0)
2025-07-07 18:21:51,814 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM uni

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()


2025-07-07 18:21:51,858 INFO sqlalchemy.engine.Engine [cached since 0.2017s ago] ('ZH_14', 1, 0)
2025-07-07 18:21:51,858 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-07 18:21:51,859 INFO sqlalchemy.engine.Engine [cached since 0.2023s ago] ('ZH_14', 1, 0)
2025-07-07 18:21:51,859 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
 LIMIT ? OFFSET ?
2025-07-07 18:21:51,859 INFO sqlalchemy.engine.Engine [cached since 0.2029s ago] ('ZH_14', 1, 0)
2025-07-07 18:21:51,861 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, 

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  session.commit()


2025-07-07 18:21:52,041 INFO sqlalchemy.engine.Engine INSERT INTO vocabulary (id, word, translation, phonetic, example_sentence, type, score, last_seen, unit_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2025-07-07 18:21:52,041 INFO sqlalchemy.engine.Engine [cached since 0.02087s ago] ('ZH_2_V8', '九', 'Neuf', 'Jiǔ', '', 'Chiffres et nombres', 0, '2025-07-07', 'ZH_2')
2025-07-07 18:21:52,042 INFO sqlalchemy.engine.Engine COMMIT
2025-07-07 18:21:52,043 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-07 18:21:52,043 INFO sqlalchemy.engine.Engine SELECT unit.id AS unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level, unit.score AS unit_score, unit.language_id AS unit_language_id 
FROM unit 
WHERE unit.id = ?
2025-07-07 18:21:52,043 INFO sqlalchemy.engine.Engine [cached since 0.01974s ago] ('ZH_2',)
2025-07-07 18:21:52,044 INFO sqlalchemy.engine.Engine INSERT INTO vocabulary (id, word, translation, phonetic, example_sentence, type, score, last_se

## Add Grammar Rules

In [6]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db()

directory_path = f'../data/{language_id}/grammar'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No grammar files found for {language_id}.")

grammars = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for grammar_file in elements_paths:
    if os.path.isfile(grammar_file) and grammar_file.endswith('.json'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(grammar_file))
        with open(grammar_file, 'r', encoding="utf8") as f:
            grammar_data = json.load(f)
        for idx, row in enumerate(grammar_data):
            grammars.append( 
                GrammarRule(
                    id=f"{language_id.upper()}_{unit_id}_G{idx + 1}",
                    title=row['title'],
                    explanation=row["content"],
                    parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
                )
            )
inserts(session, grammars)
session.close()

2025-07-07 18:21:53,491 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-07 18:21:53,492 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("language")
2025-07-07 18:21:53,492 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:53,493 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-07 18:21:53,493 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:53,494 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-07 18:21:53,494 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:53,495 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-07 18:21:53,521 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:53,522 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-07 18:21:53,522 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-07 18:21:53,523 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-07 18:21:53,523 INFO sqlalchemy.engi

  parent=session.query(Unit).filter(Unit.id == f"{language_id.upper()}_{unit_id}").first()
  session.commit()
