# Simple ETL Execution with a source and destination database

In [None]:
# For use in Jupyter Only

%config Completer.use_jedi = False

In [None]:
# Import Libraries

import os
import pandas as pd
import sqlalchemy
import getpass # For use in masked password inputs

## Setup Source Database Connection

In [None]:
# Setup Teradata connection parameters within a notebook
# Do not use in production scripts

TERADATA_HOST = '' # Required
TERADATA_PORT = '1025' # Default
TERADATA_DATABASE = '' # Default
TERADATA_USER = input("User Name:")
TERADATA_PASSWORD = getpass.getpass(prompt='Password: ', stream=None)

In [None]:
# Setup Teradata connection parameters with environment variables
# Required for production scripts
# ! Never hardcode sensitive information such as usernames and passwords !

TERADATA_HOST = os.environ['TERADATA_HOST']
TERADATA_PORT = os.environ['TERADATA_PORT'] # Default = '1025'
TERADATA_DATABASE = os.environ['TERADATA_DATABASE'] # Default = ''
TERADATA_USER = os.environ['TERADATA_USER']
TERADATA_PASSWORD = os.environ['TERADATA_PASSWORD']

In [None]:
# Generate sqlalchemy connection string using the Teradata dialect
# The Teradata dialect for sqlalchemy is part of the `teradatasqlalchemy` library

# Minimum url requirements
td_url = f'teradatasql://{TERADATA_USER}:{TERADATA_PASSWORD}@{TERADATA_HOST}'

# Alternative full url string
td_url = f'teradatasql://{TERADATA_USER}:{TERADATA_PASSWORD}@{TERADATA_HOST}:{TERADATA_PORT}/{TERADATA_DATABASE}'

In [None]:
# Create database connection to Teradata using a sqlalchemy engine  
td_engine = sqlalchemy.create_engine(td_url)

## Setup Destination Database Connection

In [None]:
# Setup SQL Server connection parameters within a notebook
# Do not use in production scripts

SQL_SERVER_HOST = '' # Required
SQL_SERVER_PORT = '1433' # Default
SQL_SERVER_DATABASE = '' # Usuaully Required
SQL_SERVER_USER = input("User Name:")
SQL_SERVER_PASSWORD = getpass.getpass(prompt='Password: ', stream=None)

In [None]:
# Setup SQL Server connection parameters with environment variables
# Required for production scripts
# ! Never hardcode sensitive information such as usernames and passwords !

SQL_SERVER_HOST = os.environ['SQL_SERVER_HOST']
SQL_SERVER_PORT = os.environ['SQL_SERVER_PORT'] # Default = '1433'
SQL_SERVER_DATABASE = os.environ['SQL_SERVER_DATABASE']
SQL_SERVER_USER = os.environ['SQL_SERVER_USER']
SQL_SERVER_PASSWORD = os.environ['SQL_SERVER_PASSWORD']

In [None]:
# Generate sqlalchemy connection string using the MS-SQL Server dialect
# The Teradata dialect for sqlalchemy is part of the `pymssql` library

# Minimum url requirements
mssql_url = f'mssql+pymssql://{SQL_SERVER_USER}:{SQL_SERVER_PASSWORD}@{SQL_SERVER_HOST}/{SQL_SERVER_DATABASE}'

# Alternative full url string
mssql_url = f'mssql+pymssql://{SQL_SERVER_USER}:{SQL_SERVER_PASSWORD}@{SQL_SERVER_HOST}:{SQL_SERVER_PORT}/{SQL_SERVER_DATABASE}'

In [None]:
# Create database connection to SQL Server using a sqlalchemy engine
mssql_engine = sqlalchemy.create_engine(mssql_url)

## Set Source Script

In [None]:
# Set the ETL Script as a multiline string
# This is recommended for simple SQL statements

source_query = """
(instert query statement here)
"""

In [None]:
# Or, import the ETL Script from a file
# This is recommended for large SQL statements

source_query = open('etl.sql', 'r').read()

## Extract Data from Source

In [None]:
# Execute query against source database and collect data in pandas DataFrame

input_data = pd.read_sql(source_query, td_engine)

## Transform DataFrame (if neccessary)

In [None]:
output_data = input_data

## Load Data into Destination

In [None]:
# Use pandas to_sql function to load data into destination

# Pushing data into a database with pandas can be an error prone process that requires debugging and iteration

# There are more precise and complex mechanisms to load data with SQLAlchemy, but pandas.to_sql is a good starting point

# For more information on how to use this function see
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html

output_data.to_sql(table_name, mssql_engine, schema=SQL_SERVER_DATABASE)