In [7]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../../../scripts/')
from s3_support import *

In [16]:
# pull merged qgiv analytics data
#       data built in build - analytics qgiv.py
df = get_dataframe_from_file("qgivmodelsdata", "analytics_qgiv.2019.csv")

In [17]:
df['date'] = pd.to_datetime(df['date'])
df.sort_values('date', ascending=True, inplace=True)

In [23]:
print(len(df), len(df['form'].unique())) 
df.head(3)

953131 23622


Unnamed: 0,date,form,org,pledges_count,events_priv_count,restrictions,amounts,ded_types,opt_ded_flds,req_ded_flds,...,kiosk_trans_vol,p2p_trans_vol,mobile_trans_vol,mobilevt_trans_vol,sms_trans_vol,fb_trans_vol,one_time_trans_vol,one_time_trans_count,rec_trans_vol,rec_trans_count
0,2018-01-01,813145,41981,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0
30104,2018-01-01,889949,1206,0,0,0,3,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0
30103,2018-01-01,857202,1122,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0


# calculate percent change of settings & transaction stats

In [34]:
ffilled = None
for form in df['form'].unique().tolist():
    this_df = df[df['form']==form].copy()
    
    this_df = this_df[['date', 'form', 'org']].merge(this_df[this_df.drop(['date', 'form', 'org'], axis=1).columns].pct_change(), left_index=True, right_index=True)
    
    # pct_change() results in NAN comparing 0's so 
    # forward fill to account for past changes
    this_df.fillna(method='ffill', inplace=True)
    # columns of all 0's will still be NAN so fill with 0
    this_df.fillna(0, inplace=True)
    
    if ffilled is None:
        ffilled = this_df
    else:
        ffilled = ffilled.append(this_df)

In [35]:
ffilled.head()

Unnamed: 0,date,form,org,pledges_count,events_priv_count,restrictions,amounts,ded_types,opt_ded_flds,req_ded_flds,...,kiosk_trans_vol,p2p_trans_vol,mobile_trans_vol,mobilevt_trans_vol,sms_trans_vol,fb_trans_vol,one_time_trans_vol,one_time_trans_count,rec_trans_vol,rec_trans_count
0,2018-01-01,813145,41981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30632,2018-01-08,813145,41981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4397,2018-01-15,813145,41981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36423,2018-01-22,813145,41981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78065,2018-01-29,813145,41981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
save_dataframe_to_file("qgivmodelsdata", "analytics_qgiv.change.2019.csv", ffilled)

uploading to S3
Done


# examining correlations between setting changes and transaction growth

In [None]:
ffilled = get_dataframe_from_file("qgivmodelsdata", "analytics_qgiv.change.2019.c")

In [44]:
trans_cols = [c for c in ffilled.columns if '_trans_' in c or '_rec_' in c or c == 'reg_count']
corrs = ffilled.drop(['date', 'form', 'org'], axis=1).corr()

In [43]:
corrs[trans_cols][~corrs.index.isin(trans_cols)].round(decimals=4)

Unnamed: 0,new_rec_volume,new_rec_count,reg_count,dl_trans_volume,dl_trans_count,dl_new_rec_count,dl_new_rec_volume,vt_trans_count,don_form_trans_count,kiosk_trans_count,...,kiosk_trans_vol,p2p_trans_vol,mobile_trans_vol,mobilevt_trans_vol,sms_trans_vol,fb_trans_vol,one_time_trans_vol,one_time_trans_count,rec_trans_vol,rec_trans_count
pledges_count,0.0053,0.0255,0.0037,0.0009,0.0251,0.028,0.0172,0.0104,0.0041,0.002,...,0.0002,,0.0005,0.0001,0.0,0.0087,-0.0018,0.0041,-0.0015,0.0106
events_priv_count,,,,,,,,,,,...,,,,,,,,,,
restrictions,0.0077,0.0387,0.002,0.0099,0.0375,0.0306,0.0191,0.0233,0.011,0.0057,...,0.0177,,0.0079,-0.0037,0.0001,0.0132,-0.001,0.011,-0.001,0.015
amounts,0.0009,0.0086,-0.0034,0.0071,0.0256,0.0147,0.0094,0.0109,0.0006,0.0039,...,0.0032,,-0.0011,-0.0071,0.0001,0.001,0.0038,0.0006,-0.0091,0.0012
ded_types,0.0113,0.0541,-0.0013,0.0014,0.0527,0.0409,0.0244,0.017,-0.0021,0.0024,...,0.0003,,-0.0014,-0.0006,0.0001,0.0325,0.0,-0.0021,-0.0137,0.0232
opt_ded_flds,,,,,,,,,,,...,,,,,,,,,,
req_ded_flds,0.0074,0.0333,0.0048,-0.004,0.0289,0.0205,0.0127,0.0155,-0.0029,-0.0002,...,0.0,,0.0004,0.0001,0.0,0.0077,-0.0005,-0.0029,-0.0142,0.0046
opt_fields,0.0097,0.0543,0.0204,0.0086,0.0632,0.0605,0.031,0.0216,0.0128,0.0048,...,0.0002,,0.0076,-0.0004,0.0001,0.0072,0.0033,0.0128,-0.0008,0.0206
req_fields,0.0073,0.0367,0.0276,0.0031,0.0469,0.03,0.0154,0.0176,0.0124,0.0114,...,0.0007,,0.0031,-0.0092,0.0001,0.006,0.0001,0.0124,-0.0018,0.0053
pledge_active,0.0075,0.0693,0.024,0.0039,0.0678,0.0578,0.0336,0.0268,0.0378,0.0051,...,-0.0005,,0.0062,0.0003,0.0002,0.0233,-0.003,0.0378,-0.0005,0.0318


In [51]:
# filter to values > 1%
# corrs[trans_cols][~corrs.index.isin(trans_cols)].round(decimals=4)>0.009

cols_all_false = ['p2p_trans_vol', 'mobile_trans_vol', 'mobilevt_trans_vol', 
                  'sms_trans_vol', 'one_time_trans_vol', 'rec_trans_vol',
                  'vt_trans_vol', 'don_form_trans_vol', 'p2p_trans_count']
corrs[trans_cols].drop(cols_all_false, axis=1)[~corrs.index.isin(trans_cols)].round(decimals=4)

Unnamed: 0,new_rec_volume,new_rec_count,reg_count,dl_trans_volume,dl_trans_count,dl_new_rec_count,dl_new_rec_volume,vt_trans_count,don_form_trans_count,kiosk_trans_count,mobile_trans_count,mobilevt_trans_count,sms_trans_count,fb_trans_count,kiosk_trans_vol,fb_trans_vol,one_time_trans_count,rec_trans_count
pledges_count,0.0053,0.0255,0.0037,0.0009,0.0251,0.028,0.0172,0.0104,0.0041,0.002,0.0101,0.0023,-0.0009,0.0097,0.0002,0.0087,0.0041,0.0106
events_priv_count,,,,,,,,,,,,,,,,,,
restrictions,0.0077,0.0387,0.002,0.0099,0.0375,0.0306,0.0191,0.0233,0.011,0.0057,0.0284,0.0124,0.0003,0.0148,0.0177,0.0132,0.011,0.015
amounts,0.0009,0.0086,-0.0034,0.0071,0.0256,0.0147,0.0094,0.0109,0.0006,0.0039,-0.0004,0.0089,0.0024,0.0019,0.0032,0.001,0.0006,0.0012
ded_types,0.0113,0.0541,-0.0013,0.0014,0.0527,0.0409,0.0244,0.017,-0.0021,0.0024,0.0156,0.0033,0.0071,0.0381,0.0003,0.0325,-0.0021,0.0232
opt_ded_flds,,,,,,,,,,,,,,,,,,
req_ded_flds,0.0074,0.0333,0.0048,-0.004,0.0289,0.0205,0.0127,0.0155,-0.0029,-0.0002,0.0188,-0.0007,0.0079,0.0086,0.0,0.0077,-0.0029,0.0046
opt_fields,0.0097,0.0543,0.0204,0.0086,0.0632,0.0605,0.031,0.0216,0.0128,0.0048,0.0464,0.0168,0.0057,0.008,0.0002,0.0072,0.0128,0.0206
req_fields,0.0073,0.0367,0.0276,0.0031,0.0469,0.03,0.0154,0.0176,0.0124,0.0114,0.0235,0.0156,0.0037,0.0078,0.0007,0.006,0.0124,0.0053
pledge_active,0.0075,0.0693,0.024,0.0039,0.0678,0.0578,0.0336,0.0268,0.0378,0.0051,0.0446,0.0154,0.0182,0.0272,-0.0005,0.0233,0.0378,0.0318
