In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append("../../../../scripts/")
from s3_support import *

# build dataset

In [2]:
# load orgs for churn data
orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")

In [3]:
cols = ['id', 'org_name', 'live_date', 'date_closed', 'signup_step_one', 'first_transaction_date',
       'pricing_package', 'segment', 'tags', 'reason_closed', 'additional_churn_info']
orgs = orgs[cols]

In [4]:
# cleanup 
date_cols = ['live_date', 'date_closed', 'signup_step_one', 'first_transaction_date']
for c in date_cols:
    orgs[c] = orgs[c].apply(lambda x: np.nan if x == '12/31/1969' else x)
    orgs[c] = pd.to_datetime(orgs[c])
    
str_cols = ['org_name', 'tags', 'reason_closed', 'additional_churn_info']
for c in str_cols:
    orgs[c] = orgs[c].fillna('')

In [5]:
orgs.head(3)

Unnamed: 0,id,org_name,live_date,date_closed,signup_step_one,first_transaction_date,pricing_package,segment,tags,reason_closed,additional_churn_info
0,441789,,NaT,NaT,2017-08-16,NaT,Start,Z - Unknown,,,
1,443044,,NaT,NaT,2018-07-24,NaT,Start,,,,
2,1045,FROM Food Fight,2011-03-14,2011-06-30,NaT,2011-05-03,Legacy Pricing,,,Voluntary - Automatically Updated,


# exploration

In [6]:
orgs[['live_date', 'date_closed', 'signup_step_one', 'first_transaction_date']].isna().sum()

live_date                 2780
date_closed               6417
signup_step_one           5308
first_transaction_date    3348
dtype: int64

In [7]:
orgs['pricing_package'].value_counts()

Package Pricing                                                                          2960
Legacy Pricing                                                                           1845
Start                                                                                     877
Package Pricing - Data Package                                                            677
Grow / Mobile Suite                                                                       448
Peer-to-Peer                                                                              429
Package Pricing - Data Package, Peer-to-Peer Package                                      216
Package Pricing - Peer-to-Peer Package                                                    165
Package Pricing - Mobile Package                                                          149
Package Pricing - Data Package, Mobile Package                                            102
Package Pricing - Auction Package, Data Package             

In [8]:
orgs['reason_closed'].value_counts()

                                                                             6606
Voluntary - Automatically Updated                                             386
Voluntary - No reason                                                         265
Involuntary - Political/Campaign Over                                         152
Involuntary - Mission Research                                                134
Voluntary - Didn't use it | Weak adoption by donor base/Org didn't market     105
Voluntary - Feature Missing/System no longer aligns strategically              92
Involuntary - Org Closed                                                       75
Voluntary - Used us for a specific event or period of time                     65
Voluntary - Fees                                                               56
Voluntary - Qgiv Fired You                                                     39
Voluntary - Integration                                                        24
Voluntary - Empl

In [9]:
len_nonnull = len(orgs[orgs['additional_churn_info']!=''])
perc_nonnull = (float(len_nonnull) / float(len(orgs))) * 100.
len_unique = len(orgs['additional_churn_info'].unique())
print("{} ({:.2f}%) orgs with additional churn info; {} unique".format(len_nonnull, perc_nonnull, len_unique))

157 (1.93%) orgs with additional churn info; 150 unique


# mean lifetimes

In [10]:
orgs['lifetime'] = orgs['date_closed'] - orgs['live_date']

print("{:.2f}% did not churn".format((orgs['lifetime'].isnull().sum() / len(orgs)) * 100.))
print("{} days average lifetime of churned orgs".format(orgs['lifetime'].mean().days))

82.72% did not churn
978 days average lifetime of churned orgs


In [11]:
for package in orgs['pricing_package'].unique():
    these_orgs = orgs[orgs['pricing_package']==package]
    mean_lifetime = these_orgs['lifetime'].mean()
    print("{:4} days: {} ({} samples)".format(mean_lifetime.days, package, len(these_orgs['lifetime'].dropna())))

1136 days: Start (229 samples)
 962 days: Legacy Pricing (943 samples)
 nan days: Package Pricing - Data Package, Mobile Package (0 samples)
1487 days: Package Pricing (103 samples)
 nan days: Package Pricing - Data Package, Mobile Package, Peer-to-Peer Package (0 samples)
 nan days: Package Pricing - Data Package (0 samples)
 362 days: Grow / Mobile Suite (101 samples)
 510 days: Peer-to-Peer (25 samples)
 nan days: Package Pricing - Peer-to-Peer Package (0 samples)
 nan days: Impact (0 samples)
 nan days: Package Pricing - Auction Package, Data Package, Mobile Package, Peer-to-Peer Package (0 samples)
 nan days: Package Pricing - Data Package, Peer-to-Peer Package (0 samples)
 nan days: Package Pricing - Auction Package, Data Package (0 samples)
 nan days: Package Pricing - Auction Package, Data Package, Mobile Package (0 samples)
 nan days: Package Pricing - Everything (0 samples)
1065 days: Package Pricing - Mobile Package (1 samples)
 nan days: Package Pricing - Impact Package (0 

In [12]:
zero_segments = []

for segment in orgs['segment'].unique():
    these_orgs = orgs[orgs['segment']==segment]
    if len(these_orgs['lifetime'].dropna()) == 0:
        zero_segments.append((str(segment), len(these_orgs)))
        continue
        
    mean_lifetime = these_orgs['lifetime'].mean()
    print("{:4} days: {} ({} samples)".format(mean_lifetime.days, segment, len(these_orgs['lifetime'].dropna())))
    
print("\nZero churn segments: {}".format(", ".join(["{} ({} samples)".format(s[0], s[1]) for s in zero_segments])))

 826 days: Z - Unknown  (182 samples)
1116 days: T - Philanthropy, Voluntarism, and Grantmaking  (27 samples)
 961 days: N - Recreation, Sports, Leisure, Athletics  (20 samples)
1015 days: P - Human Services  (85 samples)
1669 days: Y - Mutual/Membership Benefit Organizations (2 samples)
1444 days: S - Community Improvement, Capacity Building  (16 samples)
 880 days: A - Arts, Culture, and Humanities  (48 samples)
1379 days: X - Religion, Spiritual Development  (126 samples)
1306 days: B - Educational Institutions  (87 samples)
1379 days: C - Environmental Advocacy and Protection (18 samples)
1819 days: G - Disease, Disorders, Medical Disciplines  (30 samples)
 886 days: E - Health, General and Rehabilitative  (35 samples)
1071 days: O - Youth Development  (24 samples)
 853 days: J - Employment, Job Related  (4 samples)
1493 days: F - Mental Health and Crisis Intervention  (20 samples)
1219 days: Q - International, Foreign Affairs, and National Security  (25 samples)
1294 days: I - Cri

In [13]:
for reason in orgs['reason_closed'].unique():
    if reason != '':
        these_orgs = orgs[orgs['reason_closed']==reason]
        mean_lifetime = these_orgs['lifetime'].mean()
        reason_formatted = reason.replace('Voluntary - ', '').replace('Involuntary - ', '')
        print("{:4} days: {} ({} samples)".format(mean_lifetime.days, reason_formatted, len(these_orgs['lifetime'].dropna())))

 640 days: Automatically Updated (332 samples)
1177 days: Mission Research (134 samples)
1122 days: No reason (241 samples)
1212 days: Qgiv Fired You (33 samples)
1364 days: Org Closed (73 samples)
1435 days: Feature Missing/System no longer aligns strategically (90 samples)
1311 days: Integration (24 samples)
1245 days: Mandated by Umbrella Org/Board (18 samples)
1359 days: Fees (55 samples)
1740 days: Third party vendor requested the change (21 samples)
1218 days: Employee churn/New leadership or loss of main contact (23 samples)
 505 days: Political/Campaign Over (151 samples)
1153 days: Internal Issues (5 samples)
 897 days: Didn't use it | Weak adoption by donor base/Org didn't market (103 samples)
 727 days: Used us for a specific event or period of time (65 samples)
1703 days: Merged with another Org (17 samples)
1339 days: Communication/Weren't aware of features (10 samples)


# Lifetime growth trends in churned orgs

In [17]:
churned_orgs_ids = orgs[~orgs['reason_closed'].isna()&(orgs['reason_closed']!='')]['id'].tolist()
len(orgs), len(churned_orgs_ids)

(8115, 1509)

In [40]:
q = '''select
            org,
            count(distinct form) as forms,
            date_trunc('month', date) as month,
            count(id) as count,
            sum(amount) as volume
        from transactions
            where status='A'
            group by org, date_trunc('month', date)
            order by date_trunc('month', date) desc;'''
trans = redshift_query_read(q)

In [41]:
trans['month'] = pd.to_datetime(trans['month'])
trans.sort_values('month', ascending=True, inplace=True)

In [42]:
org_growth_data = []

for org in trans['org'].unique():
    this_df = trans[trans['org']==org].copy()
    if len(this_df) <= 1:
        continue
    this_df['growth'] = this_df['volume'].diff() / this_df['volume'].shift(1)
    
    org_growth_data.append({
        'org': org,
        'growth': this_df['growth'].replace([np.inf, -np.inf], np.nan).dropna().mean()
    })

In [43]:
growth_df = pd.DataFrame(org_growth_data)
growth_df['churned'] = growth_df['org'].isin(churned_orgs_ids)
growth_df.groupby('churned')['growth'].mean()

churned
False    201.953122
True      84.989985
Name: growth, dtype: float64

In [45]:
growth_df.groupby('churned')['growth'].std()

churned
False    1629.996559
True      505.322113
Name: growth, dtype: float64

In [46]:
growth_df.groupby('churned')['growth'].count()

churned
False    3531
True     1120
Name: growth, dtype: int64

In [48]:
len_churned = len(growth_df[growth_df['churned']])
len_nonchurned = len(growth_df[~growth_df['churned']])

churned_mean = growth_df[growth_df['churned']]['growth'].mean()
len_nonchurned_gt_churned_mean = len(growth_df[~growth_df['churned']&(growth_df['growth']>churned_mean)])
len_churned_gt_churned_mean = len(growth_df[growth_df['churned']&(growth_df['growth']>churned_mean)])

print("Non churned orgs > churned mean: {} ({:.2f}%)".format(len_nonchurned_gt_churned_mean, (float(len_nonchurned_gt_churned_mean) / float(len_nonchurned)) * 100.))
print("Chuned orgs > churned mean: {} ({:.2f}%)".format(len_churned_gt_churned_mean, (float(len_churned_gt_churned_mean) / float(len_churned)) * 100.))

Non churned orgs > churned mean: 568 (16.04%)
Chuned orgs > churned mean: 101 (9.01%)


This seems __very promising__. __91% of churned and 84% of nonchurned orgs__ have mean volume growth rates beneath the mean churned org growth rate.