# Email Data Pipeline

This notebook extracts data from qardio google sheets, cleans the data, and loads it into the Qardio SQL database

In [40]:
from google_apis import gsheets_functions as gs
import pandas as pd
import sqlalchemy

database = 'qardio'
host = '127.0.0.1'
user = 'root'
password = 'Party100'
url = f'mysql+mysqlconnector://{user}:{password}@{host}/{database}'
engine = sqlalchemy.create_engine(url, echo=True)
conn = engine.connect()

pd.set_option('display.max_columns', 500)

2023-06-22 15:07:02,645 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2023-06-22 15:07:02,646 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-06-22 15:07:02,659 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2023-06-22 15:07:02,661 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-06-22 15:07:02,664 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2023-06-22 15:07:02,677 INFO sqlalchemy.engine.Engine [raw sql] {}


In [41]:
qardio_data = gs.gspread_read(wb='1dKiIp2ETyzfKHzR9fMeHrgf2N_UKcSa5uPWathJzgIw',
                              ws='22_+_23')

Clean column headers

In [42]:
qardio_data.columns = qardio_data.columns.str.strip()
qardio_data.columns = qardio_data.columns.str.lower()
qardio_data.columns = qardio_data.columns.str.replace(' ', '_')
qardio_data.columns = qardio_data.columns.str.replace('.', '')
qardio_data = qardio_data.drop(columns=['year', 'day', 'time_pst'])

Solve dtypes

In [43]:
qardio_data.date_sent = pd.to_datetime(qardio_data.date_sent)
for col in qardio_data.loc[:,'recipients':].select_dtypes('object').columns:
    qardio_data[col] = qardio_data[col].astype('string')
    qardio_data[col] = qardio_data[col].str.replace('$', '')
    qardio_data[col] = qardio_data[col].str.replace('%', '')
    qardio_data[col] = qardio_data[col].str.replace(',', '')
    qardio_data[col] = qardio_data[col].str.replace('#DIV/0!', '')
    qardio_data[col] = pd.to_numeric(qardio_data[col])
    print(col)

open_rate
clicks
click_rate
true_click_rate
click_per_unique_opens
%_unsubs
%_unsubs_openers
sessions
sess_per_unique_opens
ecr
total_trans
revenue
aov
qardioarm
qardiobase_2
qardiobase_x
qardiotemp
qardiospo2
qa_case
qa+qbx_bundle
qa+qb_bundle
qa+spo2
qtemp+spo2
ultimate_bundle
qardio_core
total_quantity


In [44]:
qardio_data.head()

Unnamed: 0,campaign_name,date_sent,subject_line,subscribers,country,recipients,opens,open_rate,clicks,click_rate,true_click_rate,click_per_unique_opens,unsubs,%_unsubs,%_unsubs_openers,bounced,sessions,sess_per_unique_opens,ecr,total_trans,revenue,aov,qardioarm,qardiobase_2,qardiobase_x,qardiotemp,qardiospo2,qa_case,qa+qbx_bundle,qa+qb_bundle,qa+spo2,qtemp+spo2,ultimate_bundle,qardio_core,total_quantity
0,NY_22_E1_AU,2022-01-04,Start the new year with your full-body health ...,NSL,AU,12507,4142,33.12,36.0,0.29,0.1,0.87,23,0.18,0.56,28,24.0,0.58,0.0,,,,,,,,,,,,,,,,
1,NY_22_E1_UK,2022-01-04,Start the new year with your full-body health ...,NSL,UK,36416,11734,32.22,85.0,0.23,0.09,0.72,51,0.14,0.43,83,70.0,0.6,1.43,1.0,121.63,121.63,1.0,,,,,,,,,,,,
2,NY_22_E1_ES,2022-01-04,Start the new year with your full-body health ...,NSL,ES,8021,2917,36.37,9.0,0.11,0.05,0.31,5,0.06,0.17,36,7.0,0.24,0.0,,,,,,,,,,,,,,,,
3,NY_22_E1_IT,2022-01-04,Start the new year with your full-body health ...,NSL,IT,15755,5186,32.92,23.0,0.15,0.03,0.44,18,0.11,0.35,153,11.0,0.21,0.0,,,,,,,,,,,,,,,,
4,NY_22_E1_NL,2022-01-04,Start the new year with your full-body health ...,NSL,NL,3544,1423,40.15,15.0,0.42,0.14,1.05,10,0.28,0.7,10,6.0,0.42,0.0,,,,,,,,,,,,,,,,


## Save data to SQL database

In [45]:
qardio_data.to_sql(name='emails',
          con=conn,
          if_exists='replace',
          index=False)

2023-06-22 15:07:05,386 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-06-22 15:07:05,418 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-06-22 15:07:05,419 INFO sqlalchemy.engine.Engine [generated in 0.00198s] {'table_schema': 'qardio', 'table_name': 'emails'}
2023-06-22 15:07:05,437 INFO sqlalchemy.engine.Engine 
CREATE TABLE emails (
	campaign_name TEXT, 
	date_sent DATETIME, 
	subject_line TEXT, 
	subscribers TEXT, 
	country TEXT, 
	recipients BIGINT, 
	opens BIGINT, 
	open_rate FLOAT(53), 
	clicks FLOAT(53), 
	click_rate FLOAT(53), 
	true_click_rate FLOAT(53), 
	click_per_unique_opens FLOAT(53), 
	unsubs BIGINT, 
	`%_unsubs` FLOAT(53), 
	`%_unsubs_openers` FLOAT(53), 
	bounced BIGINT, 
	sessions FLOAT(53), 
	sess_per_unique_opens FLOAT(53), 
	ecr FLOAT(53), 
	total_trans FLOAT(53), 
	revenue FLOAT(53), 
	aov FLOAT(53), 
	qardioarm FLOAT(53), 
	qardiobase_2 FLOAT(53), 
	qard

1548