In [1]:
import pandas as pd

import sys, os, requests, json, datetime
sys.path.insert(1, '../../scripts/')
from s3_support import *

# data load

### load integration data

In [2]:
df = pd.read_csv("export_stats_report_1663089231.csv")

In [4]:
df.tail(3)

Unnamed: 0,Org,Service,Status,In Progress?,Frequency,Next Run Time,Last Export Date,Last Export Result
1608,Zeno,Salesforce,Pending,No,Hourly,2022-08-19 18:15:00,2022-08-18 07:36:50,Failed
1609,Zero Breast Cancer,Constant Contact,Pending,No,Manual,0000-00-00 00:00:00,2018-09-23 01:15:08,Success
1610,Zero Breast Cancer,Bloomerang,Active,No,Hourly,2022-09-13 13:15:00,2022-09-12 22:22:35,Success


### integration type

In [45]:
integration_types = {
    'crm': ['DonorPerfect', 'Bloomerang', 'Salesforce', 
            "Raiser's Edge - ImportOmatic", 'NeonCRM',
            'Kindful', "Raiser's Edge NXT (Beta)", 
            'eTapestry', 'Fellowship One', 'ClearView CRM',
            'Blackbaud CRM (JA)', 'Siebel', 'Church Community Builder',
            'Salesforce (SFCI)'],
    'email': ['Mailchimp', 'Constant Contact', 'Campaign Monitor',
             'Emma'],
    'financial': ['QuickBooks Online']
}

def tag_integration_type(service):
    for t in integration_types:
        if service in integration_types[t]:
            return t
        
df['integration_type'] = df['Service'].apply(tag_integration_type)

In [46]:
df['integration_type'].value_counts()

crm          1022
email         470
financial     119
Name: integration_type, dtype: int64

### load org data

In [10]:
orgs = pd.read_csv("org_download.csv")

In [12]:
orgs[['Id', 'Org Name']].tail(3)

Unnamed: 0,Id,Org Name
11834,448610,Zonta Club of Charles County
11835,40959,Zootown Church
11836,445136,​Green Acres Interactive Therapy


## tagging ID's to integrations

In [33]:
match_orgs = len(orgs[orgs['Org Name'].str.strip().isin(df['Org'].str.strip().tolist())])

print("{:,} entries; {:,} integration orgs".format(len(df), len(df['Org'].unique())))
print("{:,} integration orgs match in all org's".format(match_orgs))

1,611 entries; 1,342 integration orgs
1,382 integration orgs match in all org's


In [29]:
def get_org_id(org_name):
    if org_name in orgs['Org Name'].str.strip().tolist():
        return orgs[orgs['Org Name'].str.strip()==org_name]['Id'].iloc[0]
    return 0
    
df['id'] = df['Org'].str.strip().apply(get_org_id)

In [39]:
print("{:,} unmatched orgs".format(len(df[df['id']==0][['Org', 'id']])))

1 unmatched orgs


In [40]:
orgs['used_integrations'] = orgs['Id'].isin(df['id'].tolist())

In [41]:
orgs['used_integrations'].value_counts()

False    10496
True      1341
Name: used_integrations, dtype: int64

In [43]:
orgs.groupby('Status')['used_integrations'].value_counts()

Status          used_integrations
active          False                4228
                True                 1309
close and bill  False                   6
                True                    1
closed          False                1027
                True                    4
deleted         False                1819
                True                   27
demo            False                1603
inactive        False                   8
pending         False                1761
suspended       False                  44
Name: used_integrations, dtype: int64

In [50]:
orgs['integration_type_crm'] = orgs['Id'].isin(df[df['integration_type']=='crm']['id'].tolist())
orgs['integration_type_email'] = orgs['Id'].isin(df[df['integration_type']=='email']['id'].tolist())
orgs['integration_type_financial'] = orgs['Id'].isin(df[df['integration_type']=='financial']['id'].tolist())

In [54]:
print("CRM:")
print(orgs['integration_type_crm'].value_counts())
print()
print("Email:")
print(orgs['integration_type_email'].value_counts())
print()
print("Financial:")
print(orgs['integration_type_financial'].value_counts())

CRM:
False    10842
True       995
Name: integration_type_crm, dtype: int64

Email:
False    11371
True       466
Name: integration_type_email, dtype: int64

Financial:
False    11718
True       119
Name: integration_type_financial, dtype: int64


# analysis

In [None]:
orgs['churned'] = orgs['Status'].isin(['deleted', 'closed', 'close and bill', 'suspended'])

## general status distributions

In [60]:
orgs['Status'].value_counts(normalize=True)

active            0.467771
deleted           0.155952
pending           0.148771
demo              0.135423
closed            0.087100
suspended         0.003717
inactive          0.000676
close and bill    0.000591
Name: Status, dtype: float64

In [62]:
omit_statuses = ['pending', 'demo']

orgs[~orgs['Status'].isin(omit_statuses)]['churned'].value_counts(normalize=True)

False    0.654432
True     0.345568
Name: churned, dtype: float64

In [65]:
orgs[~orgs['Status'].isin(omit_statuses)].groupby('used_integrations')['churned'].value_counts()

used_integrations  churned
False              False      4236
                   True       2896
True               False      1309
                   True         32
Name: churned, dtype: int64

In [66]:
orgs[~orgs['Status'].isin(omit_statuses)].groupby('used_integrations')['churned'].value_counts(normalize=True)

used_integrations  churned
False              False      0.593943
                   True       0.406057
True               False      0.976137
                   True       0.023863
Name: churned, dtype: float64

## integration aware status distributions

In [57]:
print("CRM:")
print(orgs.groupby('integration_type_crm')['Status'].value_counts(normalize=True))
print()

print("Email:")
print(orgs.groupby('integration_type_email')['Status'].value_counts(normalize=True))
print()

print("Financial:")
print(orgs.groupby('integration_type_financial')['Status'].value_counts(normalize=True))

CRM:
integration_type_crm  Status        
False                 active            0.421417
                      deleted           0.168235
                      pending           0.162424
                      demo              0.147851
                      closed            0.094724
                      suspended         0.004058
                      inactive          0.000738
                      close and bill    0.000553
True                  active            0.972864
                      deleted           0.022111
                      closed            0.004020
                      close and bill    0.001005
Name: Status, dtype: float64

Email:
integration_type_email  Status        
False                   active            0.446575
                        deleted           0.161727
                        pending           0.154868
                        demo              0.140973
                        closed            0.090669
                        suspended      

In [59]:
omit_statuses = ['pending', 'demo']

print("CRM:")
print(orgs[~orgs['Status'].isin(omit_statuses)].groupby('integration_type_crm')['churned'].value_counts(normalize=True))
print()

print("Email:")
print(orgs[~orgs['Status'].isin(omit_statuses)].groupby('integration_type_email')['churned'].value_counts(normalize=True))
print()

print("Financial:")
print(orgs[~orgs['Status'].isin(omit_statuses)].groupby('integration_type_financial')['churned'].value_counts(normalize=True))

CRM:
integration_type_crm  churned
False                 False      0.612062
                      True       0.387938
True                  False      0.972864
                      True       0.027136
Name: churned, dtype: float64

Email:
integration_type_email  churned
False                   False      0.635194
                        True       0.364806
True                    False      0.984979
                        True       0.015021
Name: churned, dtype: float64

Financial:
integration_type_financial  churned
False                       False      0.649749
                            True       0.350251
True                        False      0.983193
                            True       0.016807
Name: churned, dtype: float64
