In [1]:
import pandas as pd
import numpy as np
import sys, datetime
sys.path.insert(1, '../../../../scripts/')
from s3_support import *

In [5]:
def get_orgs_logged_in_last_3_months():
    q = '''select
                distinct(users.org) as org
            from login
                left join users on login.user_id=users._id
            where
                login.original_timestamp >= add_months(current_date, -3);'''
    
    return redshift_query_read(q, schema="secure")


def get_diff_mean_growth_churned(churned_orgs_ids):
    q = '''select
                org,
                count(distinct form) as forms,
                date_trunc('month', date) as month,
                count(id) as count,
                sum(amount) as volume
            from transactions
                where status='A'
                group by org, date_trunc('month', date)
                order by date_trunc('month', date) desc;'''
    df_trans_agg = redshift_query_read(q)
    df_trans_agg['month'] = pd.to_datetime(df_trans_agg['month'])
    df_trans_agg.sort_values('month', ascending=True, inplace=True)

    org_growth_data = []

    for org in df_trans_agg['org'].unique():
        this_df = df_trans_agg[df_trans_agg['org']==org].copy()
        if len(this_df) <= 1:
            continue
        this_df['growth'] = this_df['volume'].diff() / this_df['volume'].shift(1)

        org_growth_data.append({
            'org': org,
            'growth': this_df['growth'].replace([np.inf, -np.inf], np.nan).dropna().mean()
        })

    growth_df = pd.DataFrame(org_growth_data)
    growth_df['churned'] = growth_df['org'].isin(churned_orgs_ids)
    
    mean_churned_growth_rate = growth_df[growth_df['churned']]['growth'].mean()
    
    growth_df['mean_diff_growth_churned'] = growth_df['growth'] - mean_churned_growth_rate
    
    return growth_df[['org', 'mean_diff_growth_churned']]


def get_orgs_created_users():
    q = "select org, timestamp 'epoch' + created_at * interval '1 second' as created_at, status from users order by created_at desc"
    df_users = redshift_query_read(q, schema='secure')
    df_users['created_at'] = pd.to_datetime(df_users['created_at']).dt.date
    return df_users.groupby('org')['created_at'].count().reset_index()

In [9]:
print("load orgs, integrations, transactions, recent logins, growth trend")
df_orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")
df_integrations = get_dataframe_from_file("qgiv-stats-data", 'integrations.csv')
df_trans = redshift_query_read("select * from transactions where status='A' and date>=DATEADD('month', -6, CURRENT_DATE)")
logged_in_3_months = get_orgs_logged_in_last_3_months()
created_users = get_orgs_created_users()

load orgs, integrations, transactions, recent logins, growth trend


In [10]:
print("prep data")
orgs_trans_counts = df_trans.groupby('org')['id'].count().reset_index()
orgs_never_viable = orgs_trans_counts[orgs_trans_counts['id']<100]['org'].tolist()

df_orgs['churned'] = ~df_orgs['date_closed'].isnull()

orgs_growth = get_diff_mean_growth_churned(df_orgs[df_orgs['churned']]['id'].tolist())
df_orgs = df_orgs.merge(orgs_growth, right_on="org", left_on="id")

df_orgs['integrations'] = df_orgs['id'].isin(df_integrations['org'].tolist())
df_orgs['recent_logins'] = df_orgs['id'].isin(logged_in_3_months['org'].tolist())
df_orgs['recent_created_users'] = df_orgs['id'].isin(created_users['org'].tolist())
df_orgs['never_viable'] = df_orgs['id'].isin(orgs_never_viable)

cols = ['id', 'org_name', 'status', 'churned', 'mean_diff_growth_churned', 
        'integrations', 'recent_logins', 'recent_created_users', 'never_viable']
df_orgs = df_orgs[cols]

prep data


In [11]:
print("filtering prior preds w/in 6 months")
six_months_ago = datetime.date.today() - datetime.timedelta(6*365/12)
tdy = datetime.datetime.today()
try:
    df_priorpreds = get_dataframe_from_file("qgiv-stats-data", "preds.churn.csv")
    df_priorpreds['date_predicted'] = pd.to_datetime(df_priorpreds['date_predicted'])
    df_priorpreds = df_priorpreds[df_priorpreds['date_predicted']>six_months_ago]
    print("\t{} prior preds found".format(len(df_priorpreds)))
except:
    df_priorpreds = pd.DataFrame(columns=['org', 'date_predicted'])
    print("\tno prior preds found")
    
df_orgs = df_orgs[df_orgs['status']=='active']

filtering prior preds w/in 6 months
	207 prior preds found


'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and a TypeError will be raised. To retain the current
behavior, convert the 'datetime.date' to a datetime with
'pd.Timestamp'.
  import sys


In [12]:
y_preds = df_priorpreds[df_priorpreds['date_predicted']==df_priorpreds['date_predicted'].max()]

In [15]:
df_orgs[df_orgs['id'].isin(y_preds['org'].tolist())].head()

Unnamed: 0,id,org_name,status,churned,mean_diff_growth_churned,integrations,recent_logins,recent_created_users,never_viable
12,427126,"United for USA, Inc",active,False,-88.316522,False,False,False,False
33,202756,5th Street Turkey Fund,active,False,-91.254033,False,False,False,False
124,442484,Alex Cruz For Mayor,active,False,-90.775966,False,False,False,False
155,247,AltaMed Health Services Corporation,active,False,-80.945939,False,False,False,False
163,153,America Abroad Media,active,False,-65.148925,False,False,False,False


In [19]:
labels = ['integrations', 'recent_logins', 'recent_created_users', 'mean_diff_growth_churned']
for _, o in df_orgs[df_orgs['id'].isin(y_preds['org'].tolist())].iterrows():
    print("{} ({})".format(o['org_name'], int(o['id'])))
    print("; ".join(["{}: {}".format(l, o[l]) for l in labels]))

 United for USA, Inc (427126)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -88.31652164005529
5th Street Turkey Fund (202756)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -91.25403303989712
Alex Cruz For Mayor (442484)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -90.77596615493934
AltaMed Health Services Corporation (247)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -80.94593906328834
America Abroad Media (153)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -65.14892506729845
American Culture Council, Inc. (35819)
integrations: False; recent_logins: False; recent_created_users: False; mean_diff_growth_churned: -86.76551709699203
Amphibian Productions Inc (59395)
integrations: False; recent_logins: False; recent_created_users: 

# prior preds

In [2]:
df_priorpreds = get_dataframe_from_file("qgiv-stats-data", "preds.churn.csv")
df_priorpreds.tail(25)

Unnamed: 0,date_predicted,org
148,2020-09-14 10:31:29.058859,202756
149,2020-09-14 10:31:29.058859,442484
150,2020-09-14 10:31:29.058859,247
151,2020-09-14 10:31:29.058859,153
152,2020-09-14 10:31:29.058859,35819
153,2020-09-14 10:31:29.058859,59395
154,2020-09-14 10:31:29.058859,443261
155,2020-09-14 10:31:29.058859,443458
156,2020-09-14 10:31:29.058859,19421
157,2020-09-14 10:31:29.058859,36851
