This python file serves as an orchestration program to be run **after** *civiActivityReport.ipynb*.
Components:
1. make copies of the two output tables from civiActivityReport.ipynb in order to standardize them for the proceeding stored procedures. 
**original name convention**: 
- mem_status_(2-digit mo, 2-digit day) ex. "mem_status_0406" 
- mem_type_(2-digit mo, 2-digit day); ex "mem_type_0406"

    This is done in order to preserve the original import table names, which should be deleted manually later

2. run ea stored procedure (*stored_procedure_create_type_tables*, *stored_procedure_create_status_table*), ea of which serve to insert new records (output from the new CIVI import processed by *civiActivityReport.ipynb*) into a new version of the cumulative type and status tables of the db; consolidated table names in db: consolidated_mem_type, consolidated_mem_status
3. conduct QA on the new version of the two consolidated output tables from the stored procedure: *consolidated_mem_type_temp2* and *consolidated_mem_status_temp2*
4. if QA from #3 passes, replace the two prod *consolidated* tables
5. call the stored procedure to create the stack_job table: *stored_procedure_create_stack_job.sql*

In [41]:
import os
import pandas as pd
import numpy as np
import re
import datetime
import itertools
import json
import sqlalchemy

In [42]:
# DEFINE THE DATABASE CREDENTIALS
user = 'root'
password = 'baeldung'
host = '172.17.0.2'
port = 3306
database = 'membership'

def get_connection():
	return sqlalchemy.create_engine(
		url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
			user, password, host, port, database
		)
	)

if __name__ == '__main__':

	try:
	
		# GET THE CONNECTION OBJECT (ENGINE) FOR THE DATABASE
		# working w/engines: https://docs.sqlalchemy.org/en/20/core/engines_connections.html
		engine = get_connection() #engine should be created just once, and can manage several DBAPI connections
		print(
			f"Connection to the {host} for user {user} created successfully.")
	except Exception as ex:
		print("Connection could not be made due to the following error: \n", ex)

Connection to the 172.17.0.2 for user root created successfully.


In [43]:
#make copies of the two output tables from the .ipynb <- change table name to a generic name to be consumed by the stored procedures
def copy_rename(type_table: str, status_table: str):
    #a CORE approach
    #type_table = 'mem_type_'
    #status_table = 'mem_status_'
    #want to limit the scope of the of our use of this object to a specific context, so we use Python's context manager "with"
    with engine.connect() as conn: #interacting w/db through Connection class
        conn.execute(sqlalchemy.text("DROP TABLE IF EXISTS mem_type_new_import"))
        conn.execute(sqlalchemy.text(f"CREATE TABLE mem_type_new_import LIKE {type_table}"))
        conn.execute(sqlalchemy.text(f"INSERT INTO mem_type_new_import SELECT * FROM {type_table}"))

        conn.execute(sqlalchemy.text("DROP TABLE IF EXISTS mem_status_new_import"))
        conn.execute(sqlalchemy.text(f"CREATE TABLE mem_status_new_import LIKE {status_table}"))
        conn.execute(sqlalchemy.text(f"INSERT INTO mem_status_new_import SELECT * FROM {status_table}"))
        #conn.commit()

In [44]:
# if I want to handle errors, the relative error category is "programming-time error"
copy_rename('mem_type_0406','mem_status_0406')

In [45]:
#inspector option: https://docs.sqlalchemy.org/en/20/core/reflection.html#fine-grained-reflection-with-inspector
# inspector is a low level interface which provides a backend-agnostic system of loading lists of schema, table, column, and constraint descriptions from a given database is also available.
from sqlalchemy import inspect
insp = inspect(engine)
table_name_list = insp.get_table_names()
#determine whether the tables I expect to have been injected into db from the civiActivityReport.ipynb are there
if all([i in table_name_list for i in('mem_type_0406','mem_status_0406')]):
    print('all clear to proceed')
else:
    print('new tables from copy_rename() step aren\'t found in db')

all clear


Run each stored procedure; first check that the stored procedure is stored on the db (query" *show procedure status where definer LIKE '%root%';*)

In [48]:
with engine.connect() as conn:
    result = conn.execute(sqlalchemy.text("show procedure status where definer LIKE '%root%'"))
    lista = [i[1] for i in result.all()]

print(lista)    

['GetStudentData', 'status_table_create', 'table_creations', 'typetablecreate', 'type_table_create']


In [57]:
from sqlalchemy import text
#a Core approach (because I'm interacting explicitly w/the engine as opposed to abstracted objects), where I write explicit SQL code
if all([i in lista for i in ['status_table_create', 'type_table_create']]):
    #copied code from https://docs.sqlalchemy.org/en/20/core/connections.html
    connection = engine.raw_connection()
    try:
        cursor_obj = connection.cursor()
        cursor_obj.callproc("type_table_create")
        cursor_obj.callproc("status_table_create")
        cursor_obj.close()
        connection.commit()
    finally:
        connection.close()
else: # this doesn't work: importing a script into mysql is tricky
    print("stored procedures need to be compiled in server") # running the stored procedure codebase script (.sql) from Python is an option
    #attempting to run the .sql as scripts
    with engine.connect() as conn:
        with open("/home/candela/Documents/greeneHill/membershipReportsCIVI/github/greeneHill/stored_procedure_create_type_tables.sql") as file:
            query = text(file.read())
            conn.execute(query)
        with open("/home/candela/Documents/greeneHill/membershipReportsCIVI/github/greeneHill/stored_procedure_create_status_table.sql") as file:
            query = text(file.read())
            conn.execute(query)


In [62]:
from sqlalchemy import inspect
insp = inspect(engine)
table_name_list = insp.get_table_names()

# two key resultsets from the stored procedures: consolidated_mem_type_temp2 & consolidated_mem_status_temp2
if all([i in table_name_list for i in ('consolidated_mem_type_temp2', 'consolidated_mem_status_temp2')]):
    print('both key resultsets from the stored procedures verified in db')
else:
    print('stored procedures did not create the two key resultsets')

both key resultsets from the stored procedures verified in db


In [63]:
# two key resultsets from the stored procedures: consolidated_mem_type_temp2 & consolidated_mem_status_temp2
if all([i in table_name_list for i in ('consolidated_mem_type', 'consolidated_mem_status')]):
    print('both legacy tables verified in db')
else:
    print('legacy tables not in db')

both legacy tables verified in db


QA options post stored procedure calling:
- range of dates covered: new tables should extend **beyond** the legacy prod tables
- \# of total records, ie table size: new tables should have **more** records than legacy tables
- analyze a contingency table of status or types: shape or dimension of contingency of new tables should be > or = to legacy

The two stored procedures create persisted tables *consolidated_mem_type_temp2* and *consolidated_mem_status_temp2*. These serve as candidate tables to replace the prod tables *consolidated_mem_type* and *consolidated_mem_status*, respectively

In [87]:
from sqlalchemy import Table, text, MetaData # a CORE approach
from sqlalchemy.sql import select
from sqlalchemy.sql import func
from collections import namedtuple
metadata_obj = MetaData() # a container object
#table reflection method to create a table object inferred from a table persisted in the db
#ea of the below 4 tables are the results of the stored procedure run in the step above
consolidated_mem_type_temp2 = Table("consolidated_mem_type_temp2", metadata_obj, autoload_with=engine) # 'metadata_obj argument purpose is to associate the table to the metadata object
consolidated_mem_status_temp2 = Table("consolidated_mem_status_temp2", metadata_obj, autoload_with=engine)
#pre-existing (to the calling of the stored procedures) consolidated tables
consolidated_mem_type = Table("consolidated_mem_type", metadata_obj, autoload_with=engine)
consolidated_mem_status = Table("consolidated_mem_status", metadata_obj, autoload_with=engine)

#ensure the tables have data
with engine.connect() as conn:
    type_new = conn.execute(text("SELECT COUNT(*) FROM consolidated_mem_type_temp2"))
    status_new = conn.execute(text("SELECT COUNT(*) FROM consolidated_mem_status_temp2"))
    type_legacy = conn.execute(text("SELECT COUNT(*) FROM consolidated_mem_type"))
    status_legacy = conn.execute(text("SELECT COUNT(*) FROM consolidated_mem_status"))
    

table_stats = namedtuple("table_stats",['type_new','status_new','type_legacy','status_legacy'])
connect_stats = table_stats(*[i.scalar() for i in (type_new,status_new,type_legacy,status_legacy)])


#ensure that replacement tables are longer than legacy
if connect_stats.type_new>connect_stats.type_legacy and connect_stats.status_new>connect_stats.status_legacy:
    print("min and max values are as expected; you may proceed")
else:
    print("min and max values appear off; stored procedure resultsets should be reviewed")


min and max values are as expected; you may proceed


In [65]:
#check min and max dates of the 'maxstart' field: expect the min dates of the legacy and replacement tables to be the same; but expect the max date field to be greater for the replacement table
with engine.connect() as conn: # Connections instances are typically for CORE and Sessions typical for ORM
    #result = a CursorResult object; first() method returns a scalar
    min_legacy_type = conn.execute(select(func.min(consolidated_mem_type.c.start_dt).label("minstart"))).first()
    max_legacy_type = conn.execute(select(func.max(consolidated_mem_type.c.start_dt).label("maxstart"))).first()
    min_replace_type = conn.execute(select(func.min(consolidated_mem_type_temp2.c.start_dt).label("maxstart"))).first()
    max_replace_type = conn.execute(select(func.max(consolidated_mem_type_temp2.c.start_dt).label("maxstart"))).first()

    min_legacy_status = conn.execute(select(func.min(consolidated_mem_status.c.start_dt).label("maxstart"))).first()
    max_legacy_status = conn.execute(select(func.max(consolidated_mem_status.c.start_dt).label("maxstart"))).first()
    min_replace_status = conn.execute(select(func.min(consolidated_mem_status_temp2.c.start_dt).label("maxstart"))).first()
    max_replace_status = conn.execute(select(func.max(consolidated_mem_status_temp2.c.start_dt).label("maxstart"))).first()

In [88]:
#QA date ranges of the legacy and replacement tables
a = min_legacy_type == min_replace_type #start dates of legacy and replacement (NOTE: replacement <> new table)
b = max_legacy_type < max_replace_type

c = min_legacy_status == min_replace_status
d = bool(max_legacy_status < max_replace_status)

if all([a,b,c,d]):
    print('date relationships between all tables are as expected; you can proceed')
else:
    print('table dates aren\'t as expected; review output from stored procedures')

date relationships between all tables are as expected; you can proceed


In [89]:
#TODO call the stack_job stored procedure <- the final input table to the active accounts study