# Clone database

This example will read every table of a source database and insert it into a corresponding heap table in the target database. Obviously a database is more than just heaps of data, so this is far from replication.

In [None]:
!pip install pyodbc

In [None]:
import arrow_bcp as ab
import pyodbc
import itertools

Database connection details need to be filled in

In [None]:
SQL_URL_SRC = ############
SQL_URL_TGT = ############
SQL_DB_SRC = ############
SQL_DB_TGT = ############
SQL_LOGIN_SRC = ############
SQL_LOGIN_TGT = ############
SQL_PWD_SRC = ############
SQL_PWD_TGT = ############

In [None]:
pyodbc_con_src = pyodbc.connect(
    f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SQL_URL_SRC};DATABASE={SQL_DB_SRC};UID={SQL_LOGIN_SRC};PWD={SQL_PWD_SRC}"
)
pyodbc_con_tgt = pyodbc.connect(
    f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SQL_URL_TGT};DATABASE={SQL_DB_TGT};UID={SQL_LOGIN_TGT};PWD={SQL_PWD_TGT}"
)
bcp_con_src = ab.ConnectionInfo(
    (f"-S tcp:{SQL_URL_SRC},1433 -d {SQL_DB_SRC} -U {SQL_LOGIN_SRC} -P {SQL_PWD_SRC}").split()
)
bcp_con_tgt = ab.ConnectionInfo(
    (f"-S tcp:{SQL_URL_TGT},1433 -d {SQL_DB_TGT} -U {SQL_LOGIN_TGT} -P {SQL_PWD_TGT}").split()
)

Read column types from source, will be used to replicate schema in the target database. One might also use the schema of the arrow tables to generate create scripts.

In [None]:
schema = pyodbc_con_src.execute("""
    SELECT 
        s.name AS SchemaName, t.name AS TableName,
        c.name AS ColumnName, c.is_nullable, ty.name AS DataType,
        c.max_length, c.precision, c.scale
    FROM sys.columns c
    JOIN sys.tables t ON c.object_id = t.object_id
    JOIN sys.schemas s ON t.schema_id = s.schema_id
    JOIN sys.types ty ON c.user_type_id = ty.user_type_id
    WHERE t.temporal_type <> 1
    AND c.is_hidden = 0
    ORDER BY s.name, t.name, c.column_id;
""").fetchall()

Loop over tables, generate table create script and finally read data from source and write to target using arrow_bcp

In [None]:
for iteration, ((schema_name, table_name), cols) in enumerate(itertools.groupby(schema, lambda x: x[0:2])):
    coldefs = []
    for _, _, name, null, dtype, charlen, prec, scale in cols:
        size = ""
        if dtype in ("char", "varchar", "binary", "varbinary"):
            size = "(max)" if charlen == -1 else f"({charlen})"
        if dtype in ("nchar", "nvarchar"):
            size = "(max)" if charlen == -1 else f"({charlen//2})"
        if dtype in ("decimal", "numeric"):
            size = f"({prec},{scale})"
        coldefs.append(f"[{name}] {dtype}{size} NULL")
    create = (
        f"CREATE TABLE {schema_name}.{table_name} (\n    "
        + ",\n    ".join(coldefs)
        + "\n);"
    )

    try:
        pyodbc_con_tgt.execute(f"CREATE SCHEMA {schema_name}")
        pyodbc_con_tgt.commit()
    except pyodbc.ProgrammingError:
        pass
    pyodbc_con_tgt.execute(f"DROP TABLE IF EXISTS {schema_name}.{table_name}")
    pyodbc_con_tgt.commit()
    pyodbc_con_tgt.execute(create)
    pyodbc_con_tgt.commit()

    print(f"{iteration} finished setup: {schema_name}.{table_name}, proceeding to copy data")
    
    arrow_table = bcp_con_src.download_arrow_table(f"{schema_name}.{table_name}")
    bcp_con_tgt.insert_arrow(f"{schema_name}.{table_name}", arrow_table)