# Database Access Template

Use this notebook to connect to the GermanVerbMaster database and explore it with ad-hoc SQL queries.

In [1]:
# Optional: install dependencies in the current kernel
%pip install --quiet psycopg[binary] python-dotenv pandas

Note: you may need to restart the kernel to use updated packages.


In [31]:
from pathlib import Path
import os
import json

import psycopg
from psycopg.rows import dict_row

try:
    from dotenv import load_dotenv
except ImportError:
    load_dotenv = None

project_root = Path("..").resolve()
env_path = project_root / ".env"
if load_dotenv and env_path.exists():
    load_dotenv(env_path)
    print(f"Loaded environment variables from {env_path}")
else:
    if not load_dotenv:
        print("python-dotenv is not installed; skipping .env loading.")
    elif not env_path.exists():
        print(f"No .env file found at {env_path}; relying on existing environment.")

database_url = os.environ.get("DATABASE_URL")
if not database_url:
    raise RuntimeError("DATABASE_URL is not set. Update your environment or .env file before continuing.")

connection = psycopg.connect(database_url, autocommit=True, row_factory=dict_row)
print("Connected to database.")

Loaded environment variables from C:\Projects\GermanVerbMaster\.env
Connected to database.


In [4]:
try:
    import pandas as pd
except ImportError:
    pd = None
    print("pandas is not installed; DataFrame helpers will be unavailable until you install it.")

def run_query(sql: str, params: dict | None = None, *, fetch: str = "all", size: int | None = None):
    """Execute a SQL statement and return the requested result set."""
    params = params or {}
    with connection.cursor() as cur:
        cur.execute(sql, params)
        if cur.description is None:
            return None
        if fetch == "one":
            return cur.fetchone()
        if fetch == "many":
            return cur.fetchmany(size)
        return cur.fetchall()

def query_df(sql: str, params: dict | None = None, *, fetch: str = "all", size: int | None = None):
    """Execute a SQL statement and return the results as a pandas DataFrame."""
    if pd is None:
        raise ImportError("Install pandas to use query_df (e.g. run `%pip install pandas`).")
    rows = run_query(sql, params=params, fetch=fetch, size=size)
    return pd.DataFrame(rows)

print("Helper utilities ready.")

Helper utilities ready.


In [5]:
# Replace the query below with your own SQL.
run_query("SELECT NOW() AS connected_at;", fetch="one")

{'connected_at': datetime.datetime(2025, 10, 18, 10, 44, 54, 777795, tzinfo=datetime.timezone.utc)}

In [119]:
df = query_df("SELECT lemma, pos, english, plural, gender, example_de, example_en from words where (example_en is null or example_de is null or english is null or gender is null or plural is null) and pos = 'N' limit 205")

In [120]:
# df = df.rename(columns={
#     "example_de": "exampleDe",
#     "example_en": "exampleEn"
# })

In [121]:
df.head()

Unnamed: 0,lemma,pos,english,plural,gender,example_de,example_en
0,Rückfahrt,N,return trip,Rückfahrten,die,,
1,Zuschauerin,N,,,die,,
2,Arbeiter,N,,,der,,
3,Mobilbox,N,,,die,,
4,Absender,N,sender,Absender,die,,


In [122]:
len(df)

205

In [123]:
df[df.plural.isnull()]

Unnamed: 0,lemma,pos,english,plural,gender,example_de,example_en
1,Zuschauerin,N,,,die,,
2,Arbeiter,N,,,der,,
3,Mobilbox,N,,,die,,
6,Beamte,N,,,"der, die",,
7,Deutsche,N,,,"die, der",,
...,...,...,...,...,...,...,...
159,Durst,N,thirst,,der,,
169,Fleisch,N,flesh,,das,,
174,Erwachsene,N,,,"die, der",,
176,Fahrer,N,,,der,,


In [124]:
# df.to_json('temp.json', orient="records", indent=2)

In [125]:
records = df.to_dict(orient="records")

# Wrap it into your target structure
wrapped = {
    "providerId": "manual",
    "providerLabel": "Community Import",
    "mode": "approved",
    "applyMode": "merge",
    "entries": records
}

# Export to formatted multi-line JSON file
with open("../temp.json", "w", encoding="utf-8") as f:
    json.dump(wrapped, f, ensure_ascii=False, indent=2)