In [2]:
import pandas as pd
import numpy as np

In [3]:
PATH_TRANS = "../transactions/transactions.csv"
PATH_LOGS = "../logs/logs.csv"
PATH_ANALYTICS = "../churn/analytic_base.csv"
PATH_ANALYTICSQGIV = "../churn/analytic_qgiv_stats.csv"

In [4]:
df_analytics_qgiv = pd.read_csv(PATH_ANALYTICSQGIV, engine='python', encoding='utf-8', error_bad_lines=False)
df_trans = pd.read_csv(PATH_TRANS)

Skipping line 14519449: unexpected end of data
  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
df_trans = df_trans[df_trans['status']=='A']
df_trans['date'] = pd.to_datetime(df_trans['date'])
df_trans['year'] = df_trans['date'].dt.year
df_trans['month'] = df_trans['date'].dt.month
df_trans['year_month'] = df_trans[['year', 'month']].apply(lambda x: str(x['year']) + "/" + str(x['month']), axis=1)
df_trans['is_recurring'] = df_trans['is_recurring'].astype('bool')

In [6]:
# identify donors grouped by org
df_donor_agg = df_trans.groupby(['org', 'email', 'source']).agg({'amount': 'sum', 'date': 'count'}).reset_index()
df_donor_agg['count'] = df_donor_agg['date']
df_donor_agg['sum'] = df_donor_agg['amount']
df_donor_agg.drop(['date', 'amount'], axis=1, inplace=True)

In [7]:
df_org_retention = df_donor_agg.groupby(['org', 'source'])[['count', 'sum']].mean().reset_index()
org_recurring = df_trans.groupby(['org'])['is_recurring'].mean().reset_index()

In [8]:
def get_mean_donations(org):
    if len(df_org_retention[df_org_retention['org']==org]['count']):
        return df_org_retention[df_org_retention['org']==org]['count'].iloc[0]
    else:
        return np.NaN
    
def get_recurring_count(org):
    if len(org_recurring[org_recurring['org']==org]['is_recurring']):
        return org_recurring[org_recurring['org']==org]['is_recurring'].iloc[0]
    else:
        return np.NaN

In [9]:
org_dl_count = df_analytics_qgiv.groupby('org')['dl_trans_count'].sum().reset_index()
org_dl_count['mean_donations_per_donor'] = org_dl_count['org'].apply(get_mean_donations)
org_dl_count['recurring_rate'] = org_dl_count['org'].apply(get_recurring_count)

org_dl_count.head()

Unnamed: 0,org,dl_trans_count,mean_donations_per_donor,recurring_rate
0,6,188,2.768089,0.98745
1,9,0,17.037037,0.491857
2,13,4,1.996245,0.942773
3,31,0,1.736217,0.849462
4,33,5,9.899293,0.244729


In [10]:
org_dl_count[['dl_trans_count', 'mean_donations_per_donor', 'recurring_rate']].mean()

dl_trans_count              31.243629
mean_donations_per_donor     2.673698
recurring_rate               0.885093
dtype: float64

In [11]:
org_donor_count = df_donor_agg.groupby(['org', 'source']).agg({'email': 'count', 'count': 'mean', 'sum': 'mean'}).reset_index()
org_donor_count.head()

Unnamed: 0,org,source,count,sum,email
0,0,don_form,7.0,265.0,1
1,6,don_form,2.768089,385.444601,539
2,6,givi,1.0,25.0,1
3,6,kiosk,4.201835,1179.018349,109
4,6,mobile,2.196721,260.148033,244


In [12]:
org_donor_count.groupby('source')[['count', 'sum', 'email']].mean().reset_index()

Unnamed: 0,source,count,sum,email
0,don_form,2.354226,369.510703,324.47569
1,fb,4.136724,353.12342,6.353211
2,givi,5.068699,686.416783,4.064815
3,kiosk,6.190796,658.715415,151.080906
4,mobile,2.277514,290.241431,96.902685
5,mobilevt,3.614512,417.349325,35.2
6,p2p,1.213717,91.172493,869.600289
7,sms,8.798909,324.884747,21.57665
8,vt,5.330813,723.87279,26.431859


We want to start with the basic value statement. How much is each worth? In order to determine this, we calculate the average amount donated per general donor and donor login account, but we need to consider how much has been donated by and to each P2P registrant.

_Possible consideration: probability of starting or joining a team and accounting for those funds as well_

In [13]:
df_analytics_qgiv.head()

Unnamed: 0,dl_trans_volume,new_rec_count,dl_new_rec_count,id,reg_count,new_rec_volume,event_stats,base,org,dl_new_rec_volume,reg_volume,dl_trans_count
0,0.0,0,0,31185117,0,0.0,0,38291755,438392,0.0,0.0,0
1,0.0,0,0,31185118,0,0.0,0,38291756,438392,0.0,0.0,0
2,0.0,0,0,31185119,0,0.0,0,38291757,438392,0.0,0.0,0
3,0.0,0,0,31185120,0,0.0,0,38291758,438392,0.0,0.0,0
4,0.0,0,0,31185121,0,0.0,0,38291759,438397,0.0,0.0,0


In [14]:
df_trans[['id', 'amount', 'transDonationEntity', 'transDonationEntityType']].tail()

Unnamed: 0,id,amount,transDonationEntity,transDonationEntityType
7157548,7901903,51.5,,
7157549,7901904,102.75,941821.0,4.0
7157550,7901905,20.0,831094.0,10.0
7157551,7901906,400.0,,
7157552,7901907,416.0,119874.0,4.0


In [15]:
# "EVENT": 0, "TEAM": 2, "FORM": 4, "REGISTRATION": 10

df_trans['transDonationEntityType'].unique()

array([ 0.,  4., 10.,  2., nan])

# Average value of Qgiv donor vs Donor Login donor vs P2P Registrant

In [18]:
# average raised per registrant
per_registrant = df_trans[df_trans['transDonationEntityType']==10.0].groupby('transDonationEntity')[['id', 'amount']].agg({'amount':'mean', 'id':'count'})
per_registrant_means = per_registrant.reset_index()[['amount', 'id']].mean()

p2p_mean_registration_amount = df_trans[(df_trans['source']=='p2p')&(df_trans['registrations_count']>0)]['registrations_amt'].mean()
p2p_mean_funds_raised = per_registrant_means['amount']
p2p_mean_total_funds = p2p_mean_registration_amount + per_registrant_means['amount']
p2p_mean_donations_count = per_registrant_means['id']
p2p_mean_donations_amount = per_registrant_means['amount'] / per_registrant_means['id']

In [19]:
per_donor_means = df_trans[df_trans['transDonationEntityType'].isnull()].groupby('email').agg({'amount': 'sum', 'id':'count'})[['amount', 'id']].mean()

qgiv_mean_funds_raised = per_donor_means['amount']
qgiv_mean_donations_count = per_donor_means['id']
qgiv_mean_donation_amount = per_donor_means['amount'] / per_donor_means['id']

In [22]:
dl_mean_funds_raised = qgiv_mean_donation_amount * qgiv_mean_donations_count
dl_mean_donations_count = org_dl_count[['dl_trans_count', 'mean_donations_per_donor', 'recurring_rate']].mean()['mean_donations_per_donor']

In [53]:
print("P2P Registrants")
print("Mean registration amount: ${:.2f}".format(p2p_mean_registration_amount))
print("Mean funds raised: ${:.2f}".format(p2p_mean_funds_raised))
print("Registration + funds raised: ${:.2f}".format(p2p_mean_total_funds))
print("Mean donations per registrant: {:.2f}".format(p2p_mean_donations_count))
print("Mean donation amount: ${:.2f}".format(p2p_mean_donations_amount))

print("-"*40)

print("Qgiv donors")
print("Mean funds raised: ${:.2f}".format(qgiv_mean_funds_raised))
print("Mean donations per donor: {:.2f}".format(qgiv_mean_donations_count))
print("Mean donation amount: ${:.2f}".format(qgiv_mean_donation_amount))

print("-"*40)

print("Donor Logins")
print("Mean funds raised: ${:.2f}".format(dl_mean_donations_count * qgiv_mean_donation_amount))
print("Mean donations per donor: {:.2f}".format(dl_mean_donations_count))
print("Mean donation amount: ${:.2f}".format(qgiv_mean_donation_amount))

P2P Registrants
Mean registration amount: $17.79
Mean funds raised: $78.15
Registration + funds raised: $95.94
Mean donations per registrant: 3.28
Mean donation amount: $23.85
----------------------------------------
Qgiv donors
Mean funds raised: $132.93
Mean donations per donor: 1.55
Mean donation amount: $85.54
----------------------------------------
Donor Logins
Mean funds raised: $228.71
Mean donations per donor: 2.67
Mean donation amount: $85.54


# Looking at value by source

In [47]:
df_donor_avgs = df_donor_agg.groupby('source')[['count', 'sum']].mean().reset_index()

In [48]:
src_counts = df_trans[df_trans['date']>='2019-01-01'].groupby('source')['id'].count()
src_counts_total = np.sum(src_counts)

In [49]:
df_donor_avgs['processing_perc'] = df_donor_avgs['source'].apply(lambda x: (float(src_counts[x]) / float(src_counts_total)) * 100.)

In [50]:
df_donor_avgs

Unnamed: 0,source,count,sum,processing_perc
0,don_form,2.686032,396.29167,54.112225
1,fb,3.537906,299.039184,0.114344
2,givi,10.485194,1203.063964,0.254078
3,kiosk,7.139791,796.600888,3.909776
4,mobile,2.592502,315.498289,7.858889
5,mobilevt,2.696023,406.852312,1.067627
6,p2p,1.354609,89.198883,22.87335
7,sms,2.354868,209.843701,1.311782
8,vt,7.401081,1017.885676,8.497928
