# Extract all tables in `sales` schema

In [1]:
%load_ext autoreload
%autoreload 2

## Control variables

Constants

In [5]:
SCHEMA = 'sales'
CATALOG = 'antonio_junior_raw'
DB_PASSWORD = dbutils.secrets.get(scope="antonio_junior_adw", key="pswd_mssql")
DB_HOST = dbutils.secrets.get(scope="antonio_junior_adw", key="ip_mssql")
DB_PORT = dbutils.secrets.get(scope="antonio_junior_adw", key="port_mssql")
DB_USER = "SA"
DATABASE = "AdventureWorks"

Connections vars

In [10]:
jdbc_url = f"jdbc:sqlserver://{DB_HOST}:{DB_PORT};databaseName={DATABASE};encrypt=true;trustServerCertificate=true;"

connection_properties = {
    "user": DB_USER, 
    "password": DB_PASSWORD,
    "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver"
}

# SQL QUERIES

List tables in schema

In [7]:
list_tables = f"""
SELECT TABLE_NAME 
FROM INFORMATION_SCHEMA.TABLES 
WHERE TABLE_SCHEMA = '{SCHEMA}'
"""

Create schema

In [9]:
create_schema = f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.sales"

## Extraction steps

Get tables list into the schema.

In [11]:
try:
    tables_list = spark.read.jdbc(url=jdbc_url, 
                                  table=f"({list_tables}) AS tmp", 
                                  properties=connection_properties)
    print("List of tables in the 'sales' schema retrieved successfully.")
except Exception as e:
    print(f"Failed to retrieve tables: {e}")


List of tables in the 'sales' schema retrieved successfully.


Extract all data from each table.

In [None]:
for table in tables_list.toLocalIterator():
    table_name = table.TABLE_NAME
    print(table_name)
    df = spark.read.jdbc(url=jdbc_url,
                         table=f'{SCHEMA}.{table_name}', 
                         properties=connection_properties)
    df.write \
      .format("delta") \
      .mode("overwrite") \
      .option("overwriteSchema", "true") \
      .saveAsTable(f"{CATALOG}.{SCHEMA}.{table_name}")