# 🗄️ Databases

In [7]:
# Oracle Database to Pandas Dataframe

import pandas as pd
import yaml
from sqlalchemy import create_engine

# Note: assumes pre-installed cx_oracle dependencies, which may include external downloads
# See the following: https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html

# create a sample configuration file containing database credentials
yaml_config = """
database_name:
    username: "username"
    password: "password123"
    hostname: "somewhere"
    port: "12345"
    database: "database_name"
"""

# open yaml config as dict
config = yaml.load(yaml_config, Loader=yaml.BaseLoader)["database_name"]

# oracle connection string which will have values replaced based on credentials provided from above yaml config
oracle_connection_string = (
    "oracle+cx_oracle://{username}:{password}@{hostname}:{port}/{database}"
).format(
    username=config["username"],
    password=config["password"],
    hostname=config["hostname"],
    port=config["port"],
    database=config["database"],
)

# create the database connection engine using sqlalchemy
engine = create_engine(oracle_connection_string)

# prepare a database select statement
sql_stmt = "SELECT DISTINCT * FROM TABLENAME"

# run select statement against database using pre-prepared SQLAlchemy engine
df = pd.read_sql(sql_stmt, engine)

# show the head of our dataframe result which should show the first few rows of the database table data
df.head()