This python file serves as an orchestration program to be run **after** *civiActivityReport.ipynb*.
Components:
1. make copies of the two output tables from civiActivityReport.ipynb in order to standardize them for the proceeding stored procedures. 
**original name convention**: 
- mem_status_(2-digit mo, 2-digit day) ex. "mem_status_0406" 
- mem_type_(2-digit mo, 2-digit day); ex "mem_type_0406"

    This is done in order to preserve the original import table names, which should be deleted manually later

2. run ea stored procedure (*stored_procedure_create_type_tables*, *stored_procedure_create_status_table*), ea of which serve to insert new records (output from the new CIVI import processed by *civiActivityReport.ipynb*) into a new version of the cumulative type and status tables of the db; consolidated table names in db: consolidated_mem_type, consolidated_mem_status
3. conduct QA on the new version of the two consolidated output tables from the stored procedure: *consolidated_mem_type_temp2* and *consolidated_mem_status_temp2*
4. if QA from #3 passes, replace the two prod *consolidated* tables
5. call the stored procedure to create the stack_job table: *stored_procedure_create_stack_job.sql*

In [1]:
import os
import pandas as pd
import numpy as np
import re
import datetime
import itertools
import json
import sqlalchemy

In [2]:
# DEFINE THE DATABASE CREDENTIALS
user = 'root'
password = 'baeldung'
host = '172.17.0.2'
port = 3306
database = 'membership'

def get_connection():
	return sqlalchemy.create_engine(
		url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
			user, password, host, port, database
		)
	)

if __name__ == '__main__':

	try:
	
		# GET THE CONNECTION OBJECT (ENGINE) FOR THE DATABASE
		# working w/engines: https://docs.sqlalchemy.org/en/20/core/engines_connections.html
		engine = get_connection() #engine should be created just once, and can manage several DBAPI connections
		print(
			f"Connection to the {host} for user {user} created successfully.")
	except Exception as ex:
		print("Connection could not be made due to the following error: \n", ex)

Connection to the 172.17.0.2 for user root created successfully.


In [9]:
#make copies of the two output tables from the .ipynb <- change table name to a generic name to be consumed by the stored procedures
def copy_rename(type_table: str, status_table: str):
    #a CORE approach
    #type_table = 'mem_type_'
    #status_table = 'mem_status_'
    #want to limit the scope of the of our use of this object to a specific context, so we use Python's context manager "with"
    with engine.connect() as conn: #interacting w/db through Connection class
        conn.execute(sqlalchemy.text("DROP TABLE IF EXISTS mem_type_new_import"))
        conn.execute(sqlalchemy.text(f"CREATE TABLE mem_type_new_import LIKE {type_table}"))
        conn.execute(sqlalchemy.text(f"INSERT INTO mem_type_new_import SELECT * FROM {type_table}"))

        conn.execute(sqlalchemy.text("DROP TABLE IF EXISTS mem_status_new_import"))
        conn.execute(sqlalchemy.text(f"CREATE TABLE mem_status_new_import LIKE {status_table}"))
        conn.execute(sqlalchemy.text(f"INSERT INTO mem_status_new_import SELECT * FROM {status_table}"))
        #conn.commit()

In [10]:
# if I want to handle errors, the relative error category is "programming-time error"
copy_rename('mem_type_0406','mem_status_0406')

In [None]:
#attempt to show all the tables of the db: https://docs.sqlalchemy.org/en/20/core/metadata.html; look around for db metadata
#The MetaData object can also get a listing of tables and reflect the full set. This is achieved by using the reflect() method. 
from sqlalchemy import MetaData

#this is incomplete, as inspect provided a good solution
metadata_obj = MetaData()
metadata_obj.reflect(bind=engine)
    

In [11]:
#inspector option: https://docs.sqlalchemy.org/en/20/core/reflection.html#fine-grained-reflection-with-inspector
# inspector is a low level interface which provides a backend-agnostic system of loading lists of schema, table, column, and constraint descriptions from a given database is also available.
from sqlalchemy import inspect
insp = inspect(engine)
table_name_list = insp.get_table_names()
if all([i in table_name_list for i in('mem_type_0406','mem_status_0406')]):
    print('all clear')
else:
    print('new tables from copy_rename() step aren\'t found in db')

all clear


Run each stored procedure; first check that the stored procedure is stored on the db (query" *show procedure status where definer LIKE '%root%';*)

In [17]:
with engine.connect() as conn:
    result = conn.execute(sqlalchemy.text("show procedure status where definer LIKE '%root%'"))
    lista = [i[1] for i in result.all()]

print(lista)    

['GetStudentData', 'status_table_create', 'table_creations', 'typetablecreate', 'type_table_create']


In [18]:
from sqlalchemy import text
#a Core approach (because I'm interacting explicitly w/the engine as opposed to abstracted objects), where I write explicit SQL code
if all([i in lista for i in ['status_table_create', 'type_table_create']]):
    with engine.connect() as conn:
        conn.execute(sqlalchemy.text("call type_table_create()"))
        conn.execute(sqlalchemy.text("call status_table_create()"))
else:
    print("stored procedures need to be compiled in server") # running the stored procedure codebase script (.sql) from Python is an option
    #attempting to run the .sql as scripts
    with engine.connect() as conn:
        with open("/home/candela/Documents/greeneHill/membershipReportsCIVI/github/greeneHill/stored_procedure_create_type_tables.sql") as file:
            query = text(file.read())
            conn.execute(query)
        with open("/home/candela/Documents/greeneHill/membershipReportsCIVI/github/greeneHill/stored_procedure_create_status_table.sql") as file:
            query = text(file.read())
            conn.execute(query)


In [25]:
insp = inspect(engine)
table_name_list = insp.get_table_names()

# two key resultsets from the stored procedures: consolidated_mem_type_temp2 & consolidated_mem_status_temp2
if all([i in table_name_list for i in ('consolidated_mem_type_temp2', 'consolidated_mem_status_temp2')]):
    print('both key resultsets from the stored procedures verified in db')
else:
    print('stored procedures did not create the two key resultsets')

both key resultsets from the stored procedures verified in db


QA options post stored procedure calling:
- range of dates covered: new tables should extend **beyond** the legacy prod tables
- \# of total records, ie table size: new tables should have **more** records than legacy tables
- analyze a contingency table of status or types: shape or dimension of contingency of new tables should be > or = to legacy

The two stored procedures create persisted tables *consolidated_mem_type_temp2* and *consolidated_mem_status_temp2*. These serve as candidate tables to replace the prod tables *consolidated_mem_type* and *consolidated_mem_status*, respectively

In [48]:
'''
SELECT min(start_dt) FROM consolidated_mem_type_temp2;
SELECT min(start_dt) FROM consolidated_mem_status_temp2;
SELECT max(start_dt) FROM consolidated_mem_type_temp2;
SELECT max(start_dt) FROM consolidated_mem_status_temp2;

SELECT min(start_dt) FROM consolidated_mem_type;
SELECT min(start_dt) FROM consolidated_mem_status;
SELECT max(start_dt) FROM consolidated_mem_type;
SELECT max(start_dt) FROM consolidated_mem_status;'''

from sqlalchemy import MetaData # a CORE approach
from sqlalchemy import Table
from sqlalchemy.sql import select
from sqlalchemy.sql import func
metadata_obj = MetaData() # a container object
#table reflection method to create a table object inferred from a table persisted in the db
#ea of the below 4 tables are the results of the stored procedure run in the step above
consolidated_mem_type_temp2 = Table("consolidated_mem_type_temp2", metadata_obj, autoload_with=engine) # 'metadata_obj argument purpose is to associate the table to the metadata object
consolidated_mem_status_temp2 = Table("consolidated_mem_status_temp2", metadata_obj, autoload_with=engine)
#pre-existing (to the calling of the stored procedures) consolidated tables
consolidated_mem_type = Table("consolidated_mem_type", metadata_obj, autoload_with=engine)
consolidated_mem_status = Table("consolidated_mem_status", metadata_obj, autoload_with=engine)


with engine.connect() as conn: # Connections instances are typically for CORE and Sessions typical for ORM
    #result = a CursorResult object; first() method returns a scalar
    min_legacy_type = conn.execute(select(func.min(consolidated_mem_type.c.start_dt).label("minstart"))).first()
    max_legacy_type = conn.execute(select(func.max(consolidated_mem_type.c.start_dt).label("maxstart"))).first()
    min_replace_type = conn.execute(select(func.min(consolidated_mem_type_temp2.c.start_dt).label("maxstart"))).first()
    max_replace_type = conn.execute(select(func.max(consolidated_mem_type_temp2.c.start_dt).label("maxstart"))).first()

    min_legacy_status = conn.execute(select(func.min(consolidated_mem_status.c.start_dt).label("maxstart"))).first()
    max_legacy_status = conn.execute(select(func.max(consolidated_mem_status.c.start_dt).label("maxstart"))).first()
    min_replace_status = conn.execute(select(func.min(consolidated_mem_status_temp2.c.start_dt).label("maxstart"))).first()
    max_replace_status = conn.execute(select(func.max(consolidated_mem_status_temp2.c.start_dt).label("maxstart"))).first()

In [51]:
#date ranges
a = min_legacy_type == min_replace_type #start dates of legacy and 
b = max_legacy_type < max_replace_type

c = min_legacy_status == min_replace_status
d = max_legacy_status < max_replace_status

print([a,b,c,d])

[True, False, True, True]


In [None]:
# ensure that replacement table is longer than pre-existing

In [52]:
print([max_legacy_type,max_replace_type])

[(datetime.datetime(2024, 2, 17, 0, 0),), (datetime.datetime(2024, 2, 17, 0, 0),)]
