Trial Targeting Workflow
1) Import and replace DB versions of the shopping log and member contacts table (CIVI reports **"Constituent Summary"** and **"Shopping Log - Activity Report"**; correspondignly **ingestMemberContactInfo.ipynb** and **ingestMemberShopping.ipynb**)
2) run the trial_shopping .sql/stored procedure that selects for targets
3) extract the result set that contains contact info and shopping stats

Place raw CIVI report extracts into **/membershipReportsCIVI/membershipReportingLogicSampleReports/**
NOTE: the stored procedure contains all the logic to calculate avg trips per cohort, and *currently* DOES NOT apply "leave out current trial member" when calculating avg trips 

In [1]:
# module contains some very basic pd. data frame processing
import sqlalchemy
import pandas as pd
import datetime
import sys
import importlib
from container_credentials import return_credentials

In [2]:
import sys
sys.path

['/home/mofongo/Documents/ghfc/membershipReportsCIVI/greeneHill',
 '/home/mofongo/anaconda3/envs/py12/lib/python312.zip',
 '/home/mofongo/anaconda3/envs/py12/lib/python3.12',
 '/home/mofongo/anaconda3/envs/py12/lib/python3.12/lib-dynload',
 '',
 '/home/mofongo/anaconda3/envs/py12/lib/python3.12/site-packages',
 '/home/mofongo/anaconda3/envs/py12/lib/python3.12/site-packages/setuptools/_vendor']

In [2]:
import fileProcessing2
importlib.reload(fileProcessing2)
from fileProcessing2 import process_shoplog, process_contacts

In [9]:
shop = process_shoplog('/home/mofongo/Documents/ghfc/membershipReportsCIVI/membershipReportingLogicSampleReports/shoppingLogReport_20241203.csv')
contacts = process_contacts('/home/mofongo/Documents/ghfc/membershipReportsCIVI/membershipReportingLogicSampleReports/constituentSummaryReport_20250105.csv')

In [16]:
#ensure the dataframes for shop and contacts are appropriate
contacts_qa = all([i in contacts.columns for i in ['contact_name', 'first_name', 'last_name', 'phone']])
shop_qa = all([i in shop.columns for i in ['Target_Email']])

if all([contacts_qa,shop_qa]):
    print("ok to proceed as the dataframe conlumns are as expected")
else:
    print("check the dataframe imports as the columns don't conform to the expected")

ok to proceed as the dataframe conlumns are as expected


In [17]:
# DEFINE THE DATABASE CREDENTIALS
cred_dict = return_credentials()

user = cred_dict['user'] 
password = cred_dict['pass'] 
host = cred_dict['host'] 
port = cred_dict['port'] 
database = cred_dict['database']

def get_connection():
	return sqlalchemy.create_engine(
		url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
			user, password, host, port, database
		)
	)

if __name__ == '__main__':

	try:
	
		# GET THE CONNECTION OBJECT (ENGINE) FOR THE DATABASE
		engine = get_connection()
		print(
			f"Connection to the {host} for user {user} created successfully.")
	except Exception as ex:
		print("Connection could not be made due to the following error: \n", ex)


Connection to the 172.17.0.2 for user root created successfully.


In [18]:
#db injection: DO NOT change these table names as these are hard-coded in the trial_shopping stored procedure
with engine.connect() as conn:
    shop.loc[:,['Target_Name', 'Target_Email', 'Activity_Type', 'Subject', 'Activity_Date','ingest_date']].to_sql('shop_log', 
    con=engine, if_exists='replace', index=False)

    contacts.to_sql('member_directory2', con=engine, if_exists='replace', index=False)

Call the stored procedure. Ultimately creates a temporary table **trial_tgts** with the period's target individuals

In [19]:
with engine.connect() as conn:
    result = conn.execute(sqlalchemy.text("show procedure status where definer LIKE '%root%'"))
    lista = [i[1] for i in result.all()]

if 'trial_targets' in lista:
    print('ok to proceed, the stored procedure is located in the db server')
else: 
    raise ValueError('stored procedure trial_targets not found in DB')

ok to proceed, the stored procedure is located in the db server


In [20]:
from sqlalchemy import text
#a Core approach (because I'm interacting explicitly w/the engine as opposed to abstracted objects), where I write explicit SQL code
#copied code from https://docs.sqlalchemy.org/en/20/core/connections.html
connection = engine.raw_connection()
try:
    cursor_obj = connection.cursor()
    cursor_obj.callproc("trial_targets") #./stored_procedure_trial_shopping.sql
    cursor_obj.close()
    connection.commit()
finally:
        connection.close()


In [21]:
sql = "SELECT * FROM trial_tgts"
df = pd.read_sql(sql, con=engine)

In [22]:
df.shape

(23, 10)

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   FirstDayOfWeek    23 non-null     object 
 1   email             23 non-null     object 
 2   trips             23 non-null     int64  
 3   trial_expiration  23 non-null     object 
 4   orig_type_clean   23 non-null     object 
 5   cohort_avg        23 non-null     float64
 6   contact_name      23 non-null     object 
 7   first_name        23 non-null     object 
 8   last_name         23 non-null     object 
 9   phone             23 non-null     object 
dtypes: float64(1), int64(1), object(8)
memory usage: 1.9+ KB


In [24]:
df.head()

Unnamed: 0,FirstDayOfWeek,email,trips,trial_expiration,orig_type_clean,cohort_avg,contact_name,first_name,last_name,phone
0,2024-06-09,cecarey17@gmail.com,11,2024-12-12,6 mo trial,8.4667,"Carey, Catherine",Catherine,Carey,2068329092
1,2024-06-16,strangefuturesawait@gmail.com,22,2024-12-17,6 mo trial,3.2143,"Kies, Lili",Lili,Kies,3233630588
2,2024-06-30,tanya.marquardt@gmail.com,9,2025-01-01,6 mo trial,5.0,"Marquardt, Tamás",Tamás,Marquardt,9172804393
3,2024-09-29,sarahnoelmanney@gmail.com,7,2024-12-03,2 mo trial,4.2222,"Manney, Sarah",Sarah,Manney,6504336755
4,2024-10-06,zucchinali.sara1990@gmail.com,12,2024-12-09,2 mo trial,3.0,"Zucchinali, Sara",Sara,Zucchinali,9292173648


In [25]:
#clean up some columns headers
df.rename(columns = {'FirstDayOfWeek':'week_of_trial_start','orig_type_clean':'trial_type'},inplace = True)

In [26]:
filepath = '/home/mofongo/Documents/ghfc/membershipReportsCIVI/trial_target_roster/'
filename = 'trial_tgts_'+datetime.datetime.strftime(datetime.date.today(),format="%Y-%m-%d")


In [27]:

df.to_csv(filepath+filename+'.csv',index = False)