In [125]:
import pandas as pd
import numpy as np

import sys, os, requests, json, datetime
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

In [126]:
def fetch_table(table):
    url = 'https://secure.qgiv.com/admin/qgivadmin/statistics/export_tables.php'
    payload = {'key': 'DSQR59VwyFhw21PKDF4K', 'table': table}

    rsp = requests.post(url, data=payload)
    json_data = json.loads(rsp.content)

    return json_data

# sms campaigns

In [127]:
# fresh table fetch
smscamps_data = fetch_table("smscampaign")
smscampaigns = pd.DataFrame(smscamps_data[0])
smscampaigns['created'] = pd.to_datetime(smscampaigns['created'])
smscampaigns['org'] = smscampaigns['creatingEntity'].astype(int)
smscampaigns.drop(['creatingEntity', 'creatingEntityType'], axis=1, inplace=True)

In [128]:
sms_orgs = smscampaigns['org'].unique().tolist()

print("{:,} orgs".format(len(sms_orgs)))
print("{:,.2f} campaigns per org".format(smscampaigns.groupby('org')['id'].count().mean()))
print("{:,} total campaigns".format(len(smscampaigns)))

311 orgs
4.95 campaigns per org
1,538 total campaigns


In [129]:
earliest_create = smscampaigns['created'].min()
latest_send = smscampaigns['created'].max()

print("campaigns from {} to {}".format(earliest_create, latest_send))

campaigns from 2019-03-18 13:50:14 to 2021-09-01 16:59:36


In [130]:
smscampaigns.groupby(smscampaigns['created'].dt.year)['id'].count().reset_index()

Unnamed: 0,created,id
0,2019,506
1,2020,676
2,2021,356


In [131]:
smscampaigns.groupby(smscampaigns['created'].dt.year)['org'].unique().apply(len)

created
2019    131
2020    156
2021    100
Name: org, dtype: int64

# transactions

## orgs w/ sms campaigns

In [132]:
# all trans for date range & orgs
q = "select id, date, org, amount, source from transactions where org in ({}) and status='A'"
q = q.format(", ".join([str(org) for org in sms_orgs]))

trans = redshift_query_read(q, schema='public')
trans['date'] = pd.to_datetime(trans['date'])
trans['org'] = trans['org'].astype(int)

trans = trans[trans['date'].dt.year>=2019]

In [133]:
print("{:,} transactions".format(len(trans)))
print("{:,.2f} transactions per org".format(trans.groupby('org')['id'].count().mean()))
print("{:,} sms transactions".format(len(trans[trans['source']=='sms'])))
print("{:,.2f} sms transactions per org".format(trans[trans['source']=='sms'].groupby('org')['id'].count().mean()))

677,493 transactions
2,656.84 transactions per org
20,159 sms transactions
104.45 sms transactions per org


In [134]:
trans['year'] = trans['date'].dt.year
trans_grpd = trans[trans['source']=='sms'].groupby(['year', 'org'])['id'].count().reset_index()
trans_grpd = trans_grpd.groupby(['year'])['id'].mean().reset_index()
trans_grpd.columns = ['year', 'count per org']
trans_grpd['YoY growth'] = trans_grpd['count per org'].pct_change()
trans_grpd[trans_grpd['year']>=2017]

Unnamed: 0,year,count per org,YoY growth
0,2019,94.583333,
1,2020,47.858268,-0.49401
2,2021,44.436782,-0.071492


## orgs w/out sms campaigns

In [135]:
# all trans for date range & orgs
q = "select id, date, org, amount, source from transactions where org not in ({}) and status='A'"
q = q.format(", ".join([str(org) for org in sms_orgs]))

trans_wout_camps = redshift_query_read(q, schema='public')
trans_wout_camps['date'] = pd.to_datetime(trans['date'])
trans_wout_camps['org'] = trans_wout_camps['org'].astype(int)

trans_wout_camps = trans_wout_camps[trans_wout_camps['date'].dt.year>=2019]

In [136]:
print("{:,} transactions".format(len(trans_wout_camps)))
print("{:,.2f} transactions per org".format(trans_wout_camps.groupby('org')['id'].count().mean()))
print("{:,} sms transactions".format(len(trans_wout_camps[trans_wout_camps['source']=='sms'])))
print("{:,.2f} sms transactions per org".format(trans_wout_camps[trans_wout_camps['source']=='sms'].groupby('org')['id'].count().mean()))

677,493 transactions
323.08 transactions per org
2,000 sms transactions
10.81 sms transactions per org


In [137]:
# source transactions by org
q = '''select 
            org, 
            source, 
            date_trunc('year', date) as year, 
            count(id) as trans_count, 
            sum(amount) as trans_vol 
        from transactions 
        where status='A' 
        group by org, source, date_trunc('year', date)'''
trans_source_orgs = redshift_query_read(q, schema='public')

In [138]:
grpd = trans_source_orgs.groupby(['source', 'year'])[['trans_count', 'trans_vol']].mean().reset_index()
grpd = grpd[grpd['year'].isin(['2018-01-01', '2019-01-01', '2020-01-01', '2021-01-01'])]
grpd.pivot(index='source', columns='year', values=['trans_count', 'trans_vol'])

Unnamed: 0_level_0,trans_count,trans_count,trans_count,trans_count,trans_vol,trans_vol,trans_vol,trans_vol
year,2018-01-01,2019-01-01,2020-01-01,2021-01-01,2018-01-01,2019-01-01,2020-01-01,2021-01-01
source,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
don_form,298.499188,329.425841,439.862348,279.623064,43662.025317,44279.912135,122461.446985,80325.961848
fb,10.21875,40.581395,47.888199,51.376238,1296.259687,2627.481977,2592.40528,2887.711287
givi,21.47561,60.533333,49.205128,34.82449,2770.348049,7050.442267,7212.449011,6147.551673
kiosk,465.734375,561.019417,352.746667,238.951613,55713.350312,62039.630291,29283.8688,33039.042903
mobile,93.575597,102.545166,123.058125,83.342388,11028.988683,12043.587464,14227.320244,9496.855009
mobilevt,49.864286,61.896667,31.715054,28.782946,7992.769036,8535.082567,3135.682957,2648.423023
p2p,620.496144,666.332031,522.662939,403.545455,42652.174807,46547.952949,48888.870256,37312.014391
sms,27.902174,37.350442,54.373016,37.490385,2905.118326,3400.734124,4389.301329,3868.079391
vt,147.091019,147.591684,133.750246,92.26652,18056.495267,17474.79194,14155.527483,12243.223689


In [139]:
grpd.pivot(index='source', columns='year', values=['trans_count']).reset_index()

Unnamed: 0_level_0,source,trans_count,trans_count,trans_count,trans_count
year,NaT,2018-01-01,2019-01-01,2020-01-01,2021-01-01
0,don_form,298.499188,329.425841,439.862348,279.623064
1,fb,10.21875,40.581395,47.888199,51.376238
2,givi,21.47561,60.533333,49.205128,34.82449
3,kiosk,465.734375,561.019417,352.746667,238.951613
4,mobile,93.575597,102.545166,123.058125,83.342388
5,mobilevt,49.864286,61.896667,31.715054,28.782946
6,p2p,620.496144,666.332031,522.662939,403.545455
7,sms,27.902174,37.350442,54.373016,37.490385
8,vt,147.091019,147.591684,133.750246,92.26652


In [140]:
trans_source_orgs = trans_source_orgs[(~trans_source_orgs['org'].isin(sms_orgs))&(trans_source_orgs['source']=='sms')]
grpd = trans_source_orgs.groupby(['year'])[['trans_count', 'trans_vol']].mean().reset_index()
grpd.columns = ['year', 'count per org', 'trans_vol']
grpd['YoY growth'] = grpd['count per org'].pct_change()
grpd[grpd['year']>='2017-01-01']

Unnamed: 0,year,count per org,trans_vol,YoY growth
4,2017-01-01,23.84104,2154.155636,-0.10163
5,2018-01-01,23.507212,2225.232981,-0.014002
6,2019-01-01,23.824945,2357.635842,0.013516
7,2020-01-01,56.567639,3726.39435,1.374303
8,2021-01-01,34.804444,2673.696044,-0.384729


# merging data

## sample exploration

In [141]:
sample_camp_orgs = trans['org'].unique().tolist()[50:55]
sample_camp_orgs

[441560, 442516, 441969, 19437, 399363]

## tagging bulk data

In [142]:
def get_transactions_data(r):
    trans_len_org = len(trans[trans['org']==r['org']])
    trans_len_date = len(trans[(trans['org']==r['org'])&(trans['date']>=r['created'])&(trans['date']<=(r['created'] + datetime.timedelta(days=7)))])
    trans_len_conv_week = len(trans[(trans['org']==r['org'])&(trans['date']>=r['created'])&(trans['date']<=(r['created'] + datetime.timedelta(days=7)))&(trans['source']=='sms')])
    trans_len_conv_two_week = len(trans[(trans['org']==r['org'])&(trans['date']>=r['created'])&(trans['date']<=(r['created'] + datetime.timedelta(days=14)))&(trans['source']=='sms')])
    trans_len_conv_three_week = len(trans[(trans['org']==r['org'])&(trans['date']>=r['created'])&(trans['date']<=(r['created'] + datetime.timedelta(days=21)))&(trans['source']=='sms')])
    trans_len_conv_four_week = len(trans[(trans['org']==r['org'])&(trans['date']>=r['created'])&(trans['date']<=(r['created'] + datetime.timedelta(days=30)))&(trans['source']=='sms')])

    
    return {
        'len_org': trans_len_org, 
        'len_dates': trans_len_date, 
        'len_conv_1_week': trans_len_conv_week, 
        'len_conv_2_week': trans_len_conv_two_week, 
        'len_conv_3_week': trans_len_conv_three_week, 
        'len_conv_4_week': trans_len_conv_four_week
    }

In [143]:
trans_data = smscampaigns.apply(get_transactions_data, axis=1)

In [144]:
smscampaigns = smscampaigns.merge(pd.DataFrame([e for e in trans_data]), left_index=True, right_index=True)

In [145]:
smscampaigns.tail(3)

Unnamed: 0,id,created,listtype,status,listlength,org,len_org,len_dates,len_conv_1_week,len_conv_2_week,len_conv_3_week,len_conv_4_week
1535,1536,2021-08-30 13:34:12,1,1,520,434193,4383,0,0,0,0,0
1536,1537,2021-09-01 14:07:54,1,1,43,442129,12369,0,0,0,0,0
1537,1538,2021-09-01 16:59:36,1,1,4,444093,0,0,0,0,0,0


In [146]:
smscampaigns['len_dates'].describe()

count     1538.000000
mean        82.163849
std        620.073614
min          0.000000
25%          0.000000
50%          3.000000
75%         46.750000
max      13252.000000
Name: len_dates, dtype: float64

In [147]:
smscampaigns[['len_conv_1_week', 'len_conv_2_week', 
            'len_conv_3_week', 'len_conv_4_week']].agg(['count', 'mean', 'median', 'max'])

Unnamed: 0,len_conv_1_week,len_conv_2_week,len_conv_3_week,len_conv_4_week
count,1538.0,1538.0,1538.0,1538.0
mean,1.644993,2.071521,2.581274,2.940832
median,0.0,0.0,0.0,0.0
max,68.0,93.0,156.0,216.0


In [148]:
smscampaigns[smscampaigns['len_conv_1_week']>0][['len_conv_1_week', 'len_conv_2_week', 
            'len_conv_3_week', 'len_conv_4_week']].agg(['count', 'mean', 'median', 'max'])

Unnamed: 0,len_conv_1_week,len_conv_2_week,len_conv_3_week,len_conv_4_week
count,220.0,220.0,220.0,220.0
mean,11.5,13.395455,14.2,15.172727
median,3.0,5.0,6.0,6.0
max,68.0,93.0,156.0,216.0


In [149]:
print(smscampaigns[smscampaigns['len_conv_2_week']>0]['len_conv_2_week'].agg(['count', 'mean', 'median', 'max']))
print(smscampaigns[smscampaigns['len_conv_3_week']>0]['len_conv_3_week'].agg(['count', 'mean', 'median', 'max']))
print(smscampaigns[smscampaigns['len_conv_4_week']>0]['len_conv_4_week'].agg(['count', 'mean', 'median', 'max']))

count     278.000000
mean       11.460432
median      3.000000
max        93.000000
Name: len_conv_2_week, dtype: float64
count     317.000000
mean       12.523659
median      4.000000
max       156.000000
Name: len_conv_3_week, dtype: float64
count     375.000000
mean       12.061333
median      3.000000
max       216.000000
Name: len_conv_4_week, dtype: float64


In [150]:
len_smscampaigns = len(smscampaigns)
len_zero_one_week = len(smscampaigns[smscampaigns['len_conv_1_week']==0])
len_zero_two_weeks = len(smscampaigns[smscampaigns['len_conv_2_week']==0])
len_zero_three_weeks = len(smscampaigns[smscampaigns['len_conv_3_week']==0])
len_zero_four_weeks = len(smscampaigns[smscampaigns['len_conv_4_week']==0])

print("0 conversions after 1 week: {:,} ({:.2f}%)".format(len_zero_one_week, (len_zero_one_week / len_smscampaigns) * 100.))
print("0 conversions after 2 weeks: {:,} ({:.2f}%)".format(len_zero_two_weeks, (len_zero_two_weeks / len_smscampaigns) * 100.))
print("0 conversions after 3 weeks: {:,} ({:.2f}%)".format(len_zero_three_weeks, (len_zero_three_weeks / len_smscampaigns) * 100.))
print("0 conversions after 4 weeks: {:,} ({:.2f}%)".format(len_zero_four_weeks, (len_zero_four_weeks / len_smscampaigns) * 100.))

0 conversions after 1 week: 1,318 (85.70%)
0 conversions after 2 weeks: 1,260 (81.92%)
0 conversions after 3 weeks: 1,221 (79.39%)
0 conversions after 4 weeks: 1,163 (75.62%)


In [151]:
len_dates_gt_zero = len(smscampaigns[smscampaigns['len_dates']>0])
perc_dates_gt_zero = (len_dates_gt_zero / len(smscampaigns)) * 100.

len_source_gt_zero = len(smscampaigns[smscampaigns['len_conv_1_week']>0])
perc_source_gt_zero = (len_source_gt_zero / len(smscampaigns)) * 100.

print("len_dates > 0: {:,} ({:.2f}%)".format(len_dates_gt_zero, perc_dates_gt_zero))
print("len_dates_source > 0: {:,} ({:.2f}%)".format(len_source_gt_zero, perc_source_gt_zero))

len_dates > 0: 912 (59.30%)
len_dates_source > 0: 220 (14.30%)


### 1 week campaign conversion

In [152]:
sms_conv = smscampaigns[smscampaigns['len_conv_1_week']>0]['len_conv_1_week'].value_counts()

sms_conv = sms_conv.reset_index()
sms_conv.columns = ['sms conversions', 'count']
sms_conv['perc'] = sms_conv['count'] / sms_conv['count'].sum(axis=0)

In [153]:
sms_conv.head()

Unnamed: 0,sms conversions,count,perc
0,1,68,0.309091
1,2,30,0.136364
2,32,17,0.077273
3,3,14,0.063636
4,15,12,0.054545


In [154]:
other_rows = sms_conv[sms_conv['sms conversions']>10]
other_row = {
    'sms conversions': '> 10',
    'count': other_rows['count'].sum(),
    'perc': other_rows['perc'].sum()
}
sms_conv.sort_values('sms conversions', ascending=True).head(10).append(pd.DataFrame([other_row]))

Unnamed: 0,sms conversions,count,perc
0,1,68,0.309091
1,2,30,0.136364
3,3,14,0.063636
6,4,7,0.031818
7,5,7,0.031818
8,6,6,0.027273
9,7,5,0.022727
16,8,3,0.013636
26,9,1,0.004545
12,10,4,0.018182


In [155]:
d = [
    {'1': sms_conv[sms_conv['sms conversions']==1]['count'].iloc[0]},
    {'2 to 10': sms_conv[(sms_conv['sms conversions']>=2)&(sms_conv['sms conversions']<=10)]['count'].sum()},
    {'11 to 20': sms_conv[(sms_conv['sms conversions']>=11)&(sms_conv['sms conversions']<=20)]['count'].sum()},
    {'21 to 30': sms_conv[(sms_conv['sms conversions']>=21)&(sms_conv['sms conversions']<=30)]['count'].sum()},
    {'31 to 40': sms_conv[(sms_conv['sms conversions']>=31)&(sms_conv['sms conversions']<=40)]['count'].sum()},
    {'41 to 50': sms_conv[(sms_conv['sms conversions']>=41)&(sms_conv['sms conversions']<=50)]['count'].sum()},
    {'51 to 68': sms_conv[(sms_conv['sms conversions']>=51)]['count'].sum()}
]
df_one_week = pd.DataFrame(d).sum().reset_index()
df_one_week.columns = ['1 week conversions range', '1 week conversions count']
df_one_week

Unnamed: 0,1 week conversions range,1 week conversions count
0,1,68.0
1,2 to 10,77.0
2,11 to 20,27.0
3,21 to 30,17.0
4,31 to 40,19.0
5,41 to 50,5.0
6,51 to 68,7.0


### 1 month campaign conversions

In [156]:
sms_conv = smscampaigns[smscampaigns['len_conv_1_week']>0]['len_conv_4_week'].value_counts()

sms_conv = sms_conv.reset_index()
sms_conv.columns = ['sms conversions', 'count']
sms_conv['perc'] = sms_conv['count'] / sms_conv['count'].sum(axis=0)

In [157]:
other_rows = sms_conv[sms_conv['sms conversions']>10]
other_row = {
    'sms conversions': '> 10',
    'count': other_rows['count'].sum(),
    'perc': other_rows['perc'].sum()
}
sms_conv.sort_values('sms conversions', ascending=True).head(10).append(pd.DataFrame([other_row]))

Unnamed: 0,sms conversions,count,perc
0,1,33,0.15
1,2,30,0.136364
2,3,30,0.136364
11,4,4,0.018182
7,5,6,0.027273
4,6,15,0.068182
12,7,4,0.018182
6,8,6,0.027273
23,9,2,0.009091
18,10,3,0.013636


In [158]:
d = [
    {'1': sms_conv[sms_conv['sms conversions']==1]['count'].iloc[0]},
    {'2 to 10': sms_conv[(sms_conv['sms conversions']>=2)&(sms_conv['sms conversions']<=10)]['count'].sum()},
    {'11 to 20': sms_conv[(sms_conv['sms conversions']>=11)&(sms_conv['sms conversions']<=20)]['count'].sum()},
    {'21 to 30': sms_conv[(sms_conv['sms conversions']>=21)&(sms_conv['sms conversions']<=30)]['count'].sum()},
    {'31 to 40': sms_conv[(sms_conv['sms conversions']>=31)&(sms_conv['sms conversions']<=40)]['count'].sum()},
    {'41 to 50': sms_conv[(sms_conv['sms conversions']>=41)&(sms_conv['sms conversions']<=50)]['count'].sum()},
    {'51 to 68': sms_conv[(sms_conv['sms conversions']>=51)]['count'].sum()}
]
df_four_weeks = pd.DataFrame(d).sum().reset_index()
df_four_weeks.columns = ['4 week conversions range', '4 week conversions count']
df_four_weeks

Unnamed: 0,4 week conversions range,4 week conversions count
0,1,33.0
1,2 to 10,100.0
2,11 to 20,31.0
3,21 to 30,9.0
4,31 to 40,30.0
5,41 to 50,3.0
6,51 to 68,14.0


In [159]:
df_one_week.merge(df_four_weeks, left_index=True, right_index=True)

Unnamed: 0,1 week conversions range,1 week conversions count,4 week conversions range,4 week conversions count
0,1,68.0,1,33.0
1,2 to 10,77.0,2 to 10,100.0
2,11 to 20,27.0,11 to 20,31.0
3,21 to 30,17.0,21 to 30,9.0
4,31 to 40,19.0,31 to 40,30.0
5,41 to 50,5.0,41 to 50,3.0
6,51 to 68,7.0,51 to 68,14.0


# pledge reminders

In [162]:
pledge_reminders = fetch_table("smspledgereminders")

In [163]:
pledge_reminders = pd.DataFrame(pledge_reminders[0])
pledge_reminders.tail(3)

Unnamed: 0,id,smspledge,delayVal,delayType,specificTime,isRelative,reminderMsg
9637,10092,3500,1,days,0000-00-00 00:00:00,1,Thank you so much for your pledge to %PledgeNa...
9638,10093,3500,1,days,0000-00-00 00:00:00,1,Thank you so much for your pledge to %PledgeNa...
9639,10094,3500,1,days,0000-00-00 00:00:00,1,Thank you so much for your pledge to %PledgeNa...


In [212]:
print("{:,} reminders".format(len(pledge_reminders)))
print("{:,} pledges w/ reminders".format(len(pledge_reminders['smspledge'].unique())))

9,640 reminders
3,378 pledges w/ reminders


In [215]:
pledge_reminders['specificTime'].max()

'2021-09-21 12:32:00'

In [164]:
transsmspledges = fetch_table("transsmspledge")

In [165]:
transsmspledges = pd.DataFrame(transsmspledges[0])
transsmspledges.tail(3)

Unnamed: 0,id,created,lastSentReminder,orgId,formId,transactionId,smsPledgeId,amountPledged,pledgeStatus,sendReminders,reminderCounter,fulfilledTransId
61533,61746,2021-09-01 21:03:11,0000-00-00 00:00:00,364,47162,13086850,1144,0.0,2,0,0,13086858
61534,61747,2021-09-01 21:08:39,0000-00-00 00:00:00,364,47162,13086878,1144,0.0,1,1,0,0
61535,61748,2021-09-01 22:09:40,0000-00-00 00:00:00,1927,2015,13087139,1180,0.0,2,0,0,13087146


### reminder counts

In [166]:
transsmspledges['reminders'] = transsmspledges['id'].apply(lambda x: len(pledge_reminders[pledge_reminders['smspledge']==x]))
transsmspledges['reminders'].describe()

count    61536.000000
mean         0.155957
std          0.666562
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max         21.000000
Name: reminders, dtype: float64

In [230]:
rems_count = transsmspledges['reminders'].value_counts()
rems_perc = transsmspledges['reminders'].value_counts(normalize=True)

In [231]:
rems = rems_count.reset_index().merge(rems_perc.reset_index(), on='index')
rems.columns = ['reminders', 'count', 'perc']
rems.sort_values('reminders', ascending=True)

Unnamed: 0,reminders,count,perc
0,0,36342,0.590581
1,1,21987,0.357303
3,2,274,0.004453
2,3,2892,0.046997
4,4,28,0.000455
5,5,5,8.1e-05
6,6,3,4.9e-05
7,8,2,3.3e-05
9,9,1,1.6e-05
8,15,1,1.6e-05


### reminder counts by fulfilled pledge

In [232]:
print("{:,} orgs have transsmsplegdes".format(len(transsmspledges['orgId'].unique())))
print("{:,} orgs have sent reminders".format(len(transsmspledges[transsmspledges['reminders']>0]['orgId'].unique())))

708 orgs have transsmsplegdes
653 orgs have sent reminders


In [169]:
transsmspledges['fulfilledTransId'] = transsmspledges['fulfilledTransId'].astype(int)
transsmspledges['fulfilled'] = transsmspledges['fulfilledTransId']!=0

In [170]:
rem_fulfilled = transsmspledges.groupby(['fulfilled', 'reminders'])['id'].count().reset_index()
rem_fulfilled = rem_fulfilled.pivot(index='reminders', columns='fulfilled', values='id')
rem_fulfilled.columns = ['not fulfilled', 'fulfilled']
rem_fulfilled.reset_index().fillna(0)

Unnamed: 0,reminders,not fulfilled,fulfilled
0,0,23840.0,34332.0
1,1,71.0,86.0
2,2,140.0,134.0
3,3,1443.0,1449.0
4,4,13.0,15.0
5,5,2.0,3.0
6,6,2.0,1.0
7,8,1.0,1.0
8,9,1.0,0.0
9,15,0.0,1.0


In [171]:
smspledges_reminders = rems.sort_values('reminders', ascending=True).merge(rem_fulfilled, on='reminders')
smspledges_reminders.columns = ['reminders', 'count', 'percentage', 'not fulfilled', 'fulfilled']
smspledges_reminders['count'] = smspledges_reminders['count'].apply(lambda x: "{:,}".format(x))
smspledges_reminders['not fulfilled'] = smspledges_reminders['not fulfilled'].fillna(0).astype(int).apply(lambda x: "{:,}".format(x))
smspledges_reminders['fulfilled'] = smspledges_reminders['fulfilled'].fillna(0).astype(int).apply(lambda x: "{:,}".format(x))
smspledges_reminders['percentage'] = smspledges_reminders['percentage'].apply(lambda x: "{:.2f}%".format(x*100.))
smspledges_reminders

Unnamed: 0,reminders,count,percentage,not fulfilled,fulfilled
0,0,58172,94.53%,23840,34332
1,1,157,0.26%,71,86
2,2,274,0.45%,140,134
3,3,2892,4.70%,1443,1449
4,4,28,0.05%,13,15
5,5,5,0.01%,2,3
6,6,3,0.00%,2,1
7,8,2,0.00%,1,1
8,9,1,0.00%,1,0
9,15,1,0.00%,0,1


In [204]:
smspledges_reminders['fulfilled percentage'] = ((smspledges_reminders['fulfilled'].str.replace(',', '').astype(int) / smspledges_reminders['fulfilled'].str.replace(',', '').astype(int).sum()) * 100.)
smspledges_reminders

Unnamed: 0,reminders,count,percentage,not fulfilled,fulfilled,not fulfilled percentage,fulfilled percentage
0,0,58172,94.53%,23840,34332,93.438896,95.308423
1,1,157,0.26%,71,86,0.278279,0.238743
2,2,274,0.45%,140,134,0.548718,0.371995
3,3,2892,4.70%,1443,1449,5.655718,4.022542
4,4,28,0.05%,13,15,0.050952,0.041641
5,5,5,0.01%,2,3,0.007839,0.008328
6,6,3,0.00%,2,1,0.007839,0.002776
7,8,2,0.00%,1,1,0.003919,0.002776
8,9,1,0.00%,1,0,0.003919,0.0
9,15,1,0.00%,0,1,0.0,0.002776


In [217]:
transsmspledges['created'] = pd.to_datetime(transsmspledges['created'])
transsmspledges['month'] = transsmspledges['created'].dt.to_period('M')
transsmspledges.groupby('month')[['fulfilled', 'reminders']].sum()

Unnamed: 0_level_0,fulfilled,reminders
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-03,31,232
2019-04,138,1114
2019-05,744,4032
2019-06,519,2444
2019-07,282,1578
2019-08,228,197
2019-09,737,0
2019-10,926,0
2019-11,1193,0
2019-12,964,0


In [220]:
tsms_pled = transsmspledges[transsmspledges['lastSentReminder']!='0000-00-00 00:00:00']
tsms_pled['lastSentReminderMonth'] = pd.to_datetime(tsms_pled['lastSentReminder']).dt.to_period('M')
tsms_pled.groupby('lastSentReminderMonth')['id'].count()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


lastSentReminderMonth
2019-03      16
2019-04     279
2019-05     555
2019-06     430
2019-07     294
2019-08     315
2019-09     524
2019-10    1166
2019-11     762
2019-12     664
2020-01     489
2020-02    3691
2020-03    1133
2020-04    1751
2020-05     633
2020-06     910
2020-07     378
2020-08     463
2020-09     683
2020-10     894
2020-11    1161
2020-12    1042
2021-01      82
2021-02       3
2021-03    1990
2021-04     758
2021-05    1236
2021-06     563
2021-07     277
2021-08     356
2021-09      36
Freq: M, Name: id, dtype: int64

In [223]:
transsmspledges['reminders'] = transsmspledges.apply(lambda x: 1 if x['reminders'] == 0 and x['lastSentReminder']!='0000-00-00 00:00:00' else x['reminders'], axis=1)

In [228]:
t_rems = transsmspledges.groupby(['reminders', 'fulfilled'])['id'].count().reset_index()
t_rems = t_rems.pivot(index='reminders', columns='fulfilled', values='id').reset_index()
t_rems.columns = ['reminders', 'not fulfilled', 'fulfilled']
t_rems['perc fulfilled'] = t_rems['fulfilled'] / t_rems['fulfilled'].sum()
t_rems

Unnamed: 0,reminders,not fulfilled,fulfilled,perc fulfilled
0,0,5154.0,31188.0,0.865804
1,1,18757.0,3230.0,0.089667
2,2,140.0,134.0,0.00372
3,3,1443.0,1449.0,0.040225
4,4,13.0,15.0,0.000416
5,5,2.0,3.0,8.3e-05
6,6,2.0,1.0,2.8e-05
7,8,1.0,1.0,2.8e-05
8,9,1.0,,
9,15,,1.0,2.8e-05


In [236]:
tsms_pled = transsmspledges[transsmspledges['lastSentReminder']!='0000-00-00 00:00:00']
tsms_pled['reminder_time'] = pd.to_datetime(tsms_pled['lastSentReminder']) - pd.to_datetime(tsms_pled['created'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [238]:
tsms_pled.groupby('fulfilled')['reminder_time'].agg({'mean', 'median'})

Unnamed: 0_level_0,mean,median
fulfilled,Unnamed: 1_level_1,Unnamed: 2_level_1
False,7 days 03:17:56.341990267,6 days 00:00:39
True,2 days 19:13:35.023557126,1 days 00:00:52


# 2020 processing drop

Organizations with SMS campaigns experienced a 50% drop in SMS processing in 2020. Going to look at annual processing for each group of org's (with & without campaigns) to see if they experienced an overall drop in processing that correlates to the drop in SMS processing.

In [172]:
q = "select org, date_trunc('year', date) as year, count(id) as trans_count from transactions group by org, date_trunc('year', date)"
df_annual = redshift_query_read(q, schema='public')

In [173]:
df_annual['sms'] = df_annual['org'].isin(sms_orgs)
df_annual = df_annual[df_annual['year']>='2017-01-01']
df_annual.sort_values('year', ascending=True, inplace=True)
df_annual['year'] = df_annual['year'].dt.year
df_annual['org'] = df_annual['org'].astype(int)

In [174]:
df_annual['year'].unique()

array([2017, 2018, 2019, 2020, 2021])

In [175]:
df_annual.tail()

Unnamed: 0,org,year,trans_count,sms
12614,447062,2021,1,False
9636,34803,2021,1,False
12619,444952,2021,1,False
9679,443123,2021,1,False
14923,447283,2021,2,False


In [176]:
df_annual['sms'].value_counts()

False    14321
True       782
Name: sms, dtype: int64

In [177]:
df_annual.groupby('org')['year'].count().mean(), df_annual.groupby(['org', 'year'])['trans_count'].mean().mean()

(2.9044230769230768, 595.4800370787261)

In [178]:
d = []
for org in df_annual['org'].unique().tolist():
    this_df = df_annual[df_annual['org']==org].copy()
    this_df['perc'] = this_df['trans_count'].pct_change()
    
    d.append({
        'org': org,
        'sms': this_df['sms'].iloc[0],
        '2018': this_df[this_df['year']==2018]['perc'].iloc[0] if len(this_df[this_df['year']==2018]) > 0 else np.nan,
        '2019': this_df[this_df['year']==2019]['perc'].iloc[0] if len(this_df[this_df['year']==2019]) > 0 else np.nan,
        '2020': this_df[this_df['year']==2020]['perc'].iloc[0] if len(this_df[this_df['year']==2020]) > 0 else np.nan,
        '2021': this_df[this_df['year']==2021]['perc'].iloc[0] if len(this_df[this_df['year']==2021]) > 0 else np.nan
    })

In [179]:
df_d = pd.DataFrame(d)
df_d[df_d['org']==436247]

Unnamed: 0,org,sms,2018,2019,2020,2021
0,436247,False,1.979793,0.256952,0.18254,-0.099852


In [180]:
df_d.groupby('sms')[['2018', '2019', '2020', '2021']].agg({'mean', 'median'})

Unnamed: 0_level_0,2018,2018,2019,2019,2020,2020,2021,2021
Unnamed: 0_level_1,mean,median,mean,median,mean,median,mean,median
sms,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
False,13.171262,0.075472,9.992325,0.125,6.103018,0.08779,3.974985,-0.452007
True,18.435443,0.240504,14.989003,0.348644,10.282299,0.195178,13.255828,-0.425757
