In [1]:
import json
import pandas as pd
import os
import re
from lapp.dbms import init_db, inserts, modify, delete, find_by_attr
from lapp.tables import Unit, Vocabulary, GrammarRule


In [2]:
language_name = "zh"

## Add Units

In [3]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db(language_name)

if not os.path.exists(f'../data/{language_name}'):
    raise FileNotFoundError(f"Dataset for {language_name} not found.")

# Initialize the database
with open(f'../data/{language_name}/units.json', 'r') as f:
    units_array = json.load(f)
    
units = []
for idx, unit_data in enumerate(units_array):
    units.append(
        Unit(
            unit_id = f"{language_name.upper()}_{idx}",
            title = unit_data['title'],
            description = unit_data['description'],
            level = unit_data['level']
        )
    )
inserts(session, units)
session.close()

2025-07-05 18:42:11,197 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-05 18:42:11,198 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-05 18:42:11,198 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,198 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("unit")
2025-07-05 18:42:11,198 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,198 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("calligraphy_character")
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,199 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("grammar_rule")
2025-07-05 18:42:11,200 INFO sqlal

## Add Vocabulary

In [4]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db(language_name)

directory_path = f'../data/{language_name}/vocabulary'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No vocabulary found for {language_name}.")

vocs = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for voc_file in elements_paths:
    if os.path.isfile(voc_file) and voc_file.endswith('.csv'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(voc_file))
        df = pd.read_csv(voc_file)
        for idx, row in df.iterrows():
            vocs.append( 
                Vocabulary(
                    learn_id=f"{language_name.upper()}_{unit_id}_V{idx}",
                    word = row['word'],
                    translation = row["translation"],
                    phonetic = row["pinyin"],
                    example_sentence = row.get("example_sentence", ""),
                    type = row["type"],
                    parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
                )
            )
inserts(session, vocs)
session.close()

2025-07-05 18:42:11,235 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-05 18:42:11,235 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-05 18:42:11,235 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,236 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-05 18:42:11,236 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,236 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-05 18:42:11,236 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,237 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-05 18:42:11,237 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,237 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-05 18:42:11,237 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:11,237 INFO sqlalchemy.engine.Engine COMMIT
2025-07-05 18:42:11,241 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2

  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()


2025-07-05 18:42:11,443 INFO sqlalchemy.engine.Engine [cached since 0.2014s ago] ('ZH_14', 1, 0)
2025-07-05 18:42:11,444 INFO sqlalchemy.engine.Engine SELECT unit.unit_id AS unit_unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level 
FROM unit 
WHERE unit.unit_id = ?
 LIMIT ? OFFSET ?
2025-07-05 18:42:11,444 INFO sqlalchemy.engine.Engine [cached since 0.202s ago] ('ZH_14', 1, 0)
2025-07-05 18:42:11,444 INFO sqlalchemy.engine.Engine SELECT unit.unit_id AS unit_unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level 
FROM unit 
WHERE unit.unit_id = ?
 LIMIT ? OFFSET ?
2025-07-05 18:42:11,444 INFO sqlalchemy.engine.Engine [cached since 0.2026s ago] ('ZH_14', 1, 0)
2025-07-05 18:42:11,445 INFO sqlalchemy.engine.Engine SELECT unit.unit_id AS unit_unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level 
FROM unit 
WHERE unit.unit_id = ?
 LIMIT ? OFFSET ?
2025-07-05 

  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  session.commit()


2025-07-05 18:42:11,671 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-05 18:42:11,671 INFO sqlalchemy.engine.Engine SELECT unit.unit_id AS unit_unit_id, unit.title AS unit_title, unit.description AS unit_description, unit.level AS unit_level 
FROM unit 
WHERE unit.unit_id = ?
2025-07-05 18:42:11,671 INFO sqlalchemy.engine.Engine [cached since 0.03541s ago] ('ZH_3',)
2025-07-05 18:42:11,672 INFO sqlalchemy.engine.Engine INSERT INTO vocabulary (learn_id, word, translation, phonetic, example_sentence, type, score, last_seen, unit_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2025-07-05 18:42:11,672 INFO sqlalchemy.engine.Engine [cached since 0.03813s ago] ('ZH_3_V3', 'ÊòüÊúü', 'Semaine', 'Xƒ´ngqƒ´', '', 'Date', 0, '2025-07-05', 'ZH_3')
2025-07-05 18:42:11,672 INFO sqlalchemy.engine.Engine COMMIT
2025-07-05 18:42:11,672 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-05 18:42:11,673 INFO sqlalchemy.engine.Engine SELECT unit.unit_id AS unit_unit_id, unit.title AS unit_title, unit.d

## Add Grammar Rules

In [5]:
# Initialize the database connection and create db file if it doesn't exist
engine, session = init_db(language_name)

directory_path = f'../data/{language_name}/grammar'

if not os.path.exists(directory_path):
    raise FileNotFoundError(f"No grammar files found for {language_name}.")

grammars = []

# List all elements (files and directories) in the specified directory and get their full paths
elements_paths = [os.path.join(directory_path, element) for element in os.listdir(directory_path)]
for grammar_file in elements_paths:
    if os.path.isfile(grammar_file) and grammar_file.endswith('.json'):
        unit_id = re.sub("[^0-9]", "", os.path.basename(grammar_file))
        with open(grammar_file, 'r') as f:
            grammar_data = json.load(f)
        for idx, row in enumerate(grammar_data):
            grammars.append( 
                GrammarRule(
                    learn_id=f"{language_name.upper()}_{unit_id}_G{idx + 1}",
                    title=row['title'],
                    explanation=row["content"],
                    parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
                )
            )
inserts(session, grammars)
session.close()

2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("unit")
2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calligraphy_character")
2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:13,053 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("grammar_rule")
2025-07-05 18:42:13,054 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:13,054 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("vocabulary")
2025-07-05 18:42:13,054 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:13,054 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("exercises")
2025-07-05 18:42:13,055 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-05 18:42:13,055 INFO sqlalchemy.engine.Engine COMMIT
2025-07-05 18:42:13,057 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2

  parent=session.query(Unit).filter(Unit.unit_id == f"{language_name.upper()}_{unit_id}").first()
  session.commit()
