# Oracle SQL Data Exploration Notebook

An example notebook which can be used to perform data analysis using an Oracle SQL client via SQLAlchemy and Pandas dataframes.


In [None]:
import json
import os
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlfluff
from box import Box
from dateutil.relativedelta import relativedelta
from sqlalchemy import create_engine

from cupyopt.schema import avro_schema, avro_schema_to_file, infer_df_avro_schema
from cupyopt.validation import df_avro_validate

In [None]:
# filepath to configuration file in yaml format
config_filepath = "file.yaml"
database_name = "db"

In [None]:
# open configuration file containing relevant connection details for oracle database
config = Box.from_yaml(filename=config_filepath)[database_name]

# oracle connection string which will have values replaced based on credentials provided from above json file
oracle_connection_string = (
    "oracle+cx_oracle://{username}:{password}@{hostname}:{port}/{database}"
)
live_uri = oracle_connection_string.format(
    username=config["username"],
    password=config["password"],
    hostname=config["hostname"],
    port=config["port"],
    database=config["database"],
)
# create the database connection engine using sqlalchemy library and formated oracle_connection_string
engine = create_engine(live_uri)

In [None]:
# build a SQL statement and run through SQL linter
sql_stmt = """
select distinct fs.uuid, fo1.flag, fo2.code from funky.schema fs
join funky.object1 fo1 on
fs.uuid = fo1.uuid
and fs.code = fo1.code
join funky.object2 fo2 on
fs.code = fo2.code
where fo1.flag = False
and fo2.prog = 'thing'
and fs.uuid not null
"""
sql_stmt_fixed = sqlfluff.fix(sql_stmt, dialect="ansi")
print(sql_stmt_fixed)

In [None]:
# read sql using linted SQL from above
df = pd.read_sql(sql_stmt_fixed, engine)
df.head()

In [None]:
# adjust column names and types if needed
df = df.rename(
    columns={
        "colname": "new_colname",
    }
)
df = df.astype(
    {
        "new_colname": "str",
    }
)