# Import / Config

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from pathlib import Path
import sqlglot
from edurel.utils.duck_utils import *

BASE_DIR = "/home/basis/work/github/edurel"
DB_DIR = f"{BASE_DIR}/databases/"



# FileDB

In [5]:
con = duckdb_file_con(f"{DB_DIR}/db-adw-oltp")

In [7]:
print(duckdb_schema(con))

Table: Address (AddressID INTEGER NOT NULL, AddressLine1 VARCHAR NOT NULL, AddressLine2 VARCHAR NULL, City VARCHAR NOT NULL, StateProvinceID INTEGER NOT NULL, PostalCode VARCHAR NOT NULL, SpatialLocation VARCHAR NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: AddressType (AddressTypeID INTEGER NOT NULL, Name VARCHAR NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BillOfMaterials (BillOfMaterialsID INTEGER NOT NULL, ProductAssemblyID INTEGER NULL, ComponentID INTEGER NOT NULL, StartDate TIMESTAMP NOT NULL, EndDate TIMESTAMP NULL, UnitMeasureCode VARCHAR NOT NULL, BOMLevel SMALLINT NOT NULL, PerAssemblyQty DECIMAL(8,2) NOT NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BusinessEntity (BusinessEntityID INTEGER NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT NULL)
Table: BusinessEntityAddress (BusinessEntityID INTEGER NOT NULL, AddressID INTEGER NOT NULL, AddressTypeID INTEGER NOT NULL, rowguid VARCHAR NULL, ModifiedDate TIMESTAMP NOT N

# MemDB

In [None]:
con = duckdb_mem_con(DB_DIR + "db-company_en")
print(duckdb_schema(con))
sql_tsql = """
select * from employee;
"""
sql_print(con, sql_tsql)

con.close()

Table: EmpProj (EID INTEGER NOT NULL, PID INTEGER NOT NULL, NoOfHoursPerWeek INTEGER NOT NULL)
Table: Employee (EID INTEGER NOT NULL, OUID INTEGER NOT NULL, LastName VARCHAR NOT NULL, Hiredate DATE NOT NULL, Salary DECIMAL(9,2) NOT NULL, Bonus DECIMAL(9,2) NULL)
Table: OrgUnit (OUID INTEGER NOT NULL, Head INTEGER NULL, SuperUnit INTEGER NULL, Name VARCHAR NOT NULL)
Table: Project (PID INTEGER NOT NULL, Title VARCHAR NOT NULL, Budget DECIMAL(13,2) NULL)
Foreign Key: EmpProj(EID) -> Employee(EID)
Foreign Key: EmpProj(PID) -> Project(PID)
Foreign Key: Employee(OUID) -> OrgUnit(OUID)

┌───────┬───────┬──────────┬────────────┬──────────────┬──────────────┐
│  EID  │ OUID  │ LastName │  Hiredate  │    Salary    │    Bonus     │
│ int32 │ int32 │ varchar  │    date    │ decimal(9,2) │ decimal(9,2) │
├───────┼───────┼──────────┼────────────┼──────────────┼──────────────┤
│   101 │    11 │ Patil    │ 2000-05-01 │    180000.00 │         NULL │
│   102 │    12 │ Durmaz   │ 2005-07-01 │    120000.

# adw-oltp

In [None]:
with (Path(DB_DIR) / "db-adw-oltp" / "schema1.sql").open("r") as f:
    sql_tsql = f.read()

sql_duckdb = sqlglot.transpile(sql_tsql, read="tsql", write="duckdb")
sql_duckdb_str = "\n".join(sql_duckdb)
print(sql_duckdb_str)


In [None]:

con = duckdb_mem_con(DB_DIR + "db-adw-oltp")
con.execute(sql_duckdb_str)
print(duckdb_schema(con))
con.close()

# CSV

In [12]:
from pathlib import Path
import duckdb
from edurel.utils.duck_utils import *


In [None]:
csv_dir = f"{DB_DIR}/db-adw-oltp/tmp/csv"
tsv_dir = f"{DB_DIR}/db-adw-oltp/tmp/tsv"
parquet_dir = f"{DB_DIR}/db-adw-oltp/tmp/parquet"



spec = "header=false, delim = '+|'"
db_file_op(csv_dir, "*", ".csv", spec, show=True, out_path=parquet_dir)