In [2]:
import pandas as pd
import numpy as np

import sys
sys.path.append("../../../../scripts/")
from s3_support import *

# load churn orgs

In [6]:
# load orgs for churn data
orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")

cols = ['id', 'org_name', 'live_date', 'date_closed', 'signup_step_one', 'first_transaction_date',
       'pricing_package', 'segment', 'tags', 'reason_closed', 'additional_churn_info']
orgs = orgs[cols]

# cleanup 
date_cols = ['live_date', 'date_closed', 'signup_step_one', 'first_transaction_date']
for c in date_cols:
    orgs[c] = orgs[c].apply(lambda x: np.nan if x == '12/31/1969' else x)
    orgs[c] = pd.to_datetime(orgs[c])
    
str_cols = ['org_name', 'tags', 'reason_closed', 'additional_churn_info']
for c in str_cols:
    orgs[c] = orgs[c].fillna('')

In [7]:
churned_orgs = orgs[~orgs['date_closed'].isnull()]
churned_2020 = churned_orgs[churned_orgs['date_closed']>'01-01-2020']

len(orgs), len(churned_orgs), len(churned_2020), churned_orgs['date_closed'].max()

(8562, 1756, 130, Timestamp('2020-08-14 00:00:00'))

In [8]:
active_org_count = redshift_query_read("select count(id) as active_orgs from organization where status=1", schema="production")
active_org_count

Unnamed: 0,active_orgs
0,4068


# logins

In [5]:
q = '''select
            users.org as org,
            date_trunc('day', original_timestamp) as date,
            count(login.user_id) as users
        from login
            left join users on login.user_id=users._id
        group by date, org
        order by date desc;'''
logins = redshift_query_read(q, schema="secure")

In [6]:
logins.head(3)

Unnamed: 0,org,date,users
0,445551,2020-05-17,9
1,442020,2020-05-17,4
2,443904,2020-05-17,1


In [10]:
# intersection of logins and churned orgs
perc_active_logged_in = (float(len(logins['org'].unique())) / float(active_org_count['active_orgs'].iloc[0])) * 100.
len(logins['org'].unique()), len(logins[logins['org'].isin(churned_2020['id'].tolist())]), "{:.2f}% active orgs have logged in".format(perc_active_logged_in)

(2641, 0, '67.77% active orgs have logged in')

None of the 2020 churned organizations have logged into the system this year

In [11]:
logins['date'].value_counts().reset_index().sort_values('index').head(7)

Unnamed: 0,index,date
114,2019-06-18,1
116,2019-08-30,1
119,2019-10-26,1
118,2019-10-27,1
115,2020-01-12,1
117,2020-01-23,1
112,2020-01-25,107


There are only 2500 organizations represented in the login data. There is consistent login data from the end of January. There are 3850 active organizations, so __33% of active organizations have not logged into the system since 1/25/2020 (almost 4 months ago as of the time of this writing)__.

The inverse might be a helpful sign here. If none of the churned organizations appear in the login data and 65% of the organizations login with some kind of regularity, it would stand to reason that logins are a (possibly weak) indicator of retention.

__<span style="color:blue">@todo it would be a good idea to explore login frequencies of the orgs that are logging in regularly</span>__

# users created

In [5]:
q = "select org, timestamp 'epoch' + created_at * interval '1 second' as created_at, status from users order by created_at desc"
df_users = redshift_query_read(q, schema='secure')
df_users['created_at'] = pd.to_datetime(df_users['created_at']).dt.date
df_users.head(3)

Unnamed: 0,org,created_at,status
0,442870.0,2020-08-25,
1,,2020-08-25,
2,443247.0,2020-08-24,


In [9]:
len(df_users[df_users['org'].isin(churned_2020['id'].tolist())])

0

In [13]:
df_users.groupby('org')['created_at'].count().reset_index().head()

Unnamed: 0,org,created_at
0,0,2
1,100355,2
2,100356,3
3,1005,1
4,10160,1


# integrations

Despite not seeing any login activity for churned organizations, integrations can be automated. I am still unsure of the actual trigger for "activated_integration" but it may be integration execution rather than actual "activation" since the numbers seem rather high given the number of organizations actively using integrations. Let's check to see if any of the churn organizations appear in the integrations data to verify.

In [9]:
q = '''select
            users.org as org,
            date_trunc('month', original_timestamp) as month,
            count(activated_integration.id) as activations 
        from activated_integration
            left join users on activated_integration.uuid=users.uuid
        group by month, org
        order by month desc;'''
integrations = redshift_query_read(q, schema="secure")

In [10]:
integrations.head(3)

Unnamed: 0,org,month,activations
0,34800,2020-04-01,1
1,444660,2020-04-01,1
2,29705,2020-04-01,1


In [11]:
# intersection of integrations and churned orgs
len(integrations['org'].unique()), len(integrations[integrations['org'].isin(churned_2020['id'].tolist())])

(38, 0)

No churned organizations here. We know that the churn rates of organizations that use integrations is exceedingly low so this is not particularly surprising. It does support the idea that active integrations increases retention.