In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(1, '../../../../scripts/')
from s3_support import *
from datetime import date
from dateutil.relativedelta import relativedelta

In [2]:
# load ticket data (w/ org ID as 'external_id')
zd = get_dataframe_from_file("qgiv-stats-data", "zendesk.mrgd.csv")

In [3]:
zd = zd[['external_id', 'created_at', 'updated_at']]
zd = zd[zd['external_id'].apply(lambda x: str(x).isnumeric())]
zd['org'] = zd['external_id'].astype(int, errors='ignore')
zd.drop('external_id', axis=1, inplace=True)
zd['created_at'] = pd.to_datetime(zd['created_at'])
zd.head()

Unnamed: 0,created_at,updated_at,org
0,2018-03-17 02:34:39,2020-07-31 12:04:58,436247
1,2018-10-11 17:51:04,2020-04-20 15:05:23,533
2,2019-02-26 18:44:05,2020-07-24 12:13:39,369
3,2019-03-06 23:00:54,2020-08-03 14:04:44,442207
4,2019-05-22 19:39:32,2020-07-14 18:04:42,443030


In [4]:
six_months_ago = date.today() + relativedelta(months=-6)
print("{} tickets".format(len(zd)))
print("{} orgs".format(len(zd['org'].unique())))
print("from {} to {}".format(zd['created_at'].min(), zd['created_at'].max()))
print("-"*20)
print("last 6 months:")
print("{} tickets".format(len(zd[zd['created_at']>=six_months_ago])))
print("{} orgs".format(len(zd[zd['created_at']>=six_months_ago]['org'].unique())))

4590 tickets
1679 orgs
from 2018-03-17 02:34:39 to 2020-08-03 16:08:26
--------------------
last 6 months:
4420 tickets
1637 orgs


'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and a TypeError will be raised. To retain the current
behavior, convert the 'datetime.date' to a datetime with
'pd.Timestamp'.
  import sys
'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and a TypeError will be raised. To retain the current
behavior, convert the 'datetime.date' to a datetime with
'pd.Timestamp'.
  


## churn correlation

In [5]:
df_orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")
df_orgs['churned'] = ~df_orgs['date_closed'].isnull()
df_orgs['id'] = df_orgs['id'].astype(int)
df_orgs['zendesk_active'] = df_orgs['id'].isin(zd['org'].tolist())
df_orgs['zendesk_tickets'] = df_orgs['id'].apply(lambda o: len(zd[zd['org']==o]))

In [6]:
print("{} orgs".format(len(df_orgs)))
print("{} churned".format(len(df_orgs[df_orgs['churned']])))
print("{} zendesk active".format(len(df_orgs[df_orgs['zendesk_active']])))
print("{} churned & zendesk active".format(len(df_orgs[(df_orgs['churned'])&(df_orgs['zendesk_active'])])))
print("-"*20)
print("ticket counts:")
print("average (all): {:.2f}".format(df_orgs['zendesk_tickets'].mean()))
print("average (churned): {:.2f}".format(df_orgs[df_orgs['churned']]['zendesk_tickets'].mean()))
print("average (non-churned): {:.2f}".format(df_orgs[~df_orgs['churned']]['zendesk_tickets'].mean()))

8755 orgs
1765 churned
1678 zendesk active
65 churned & zendesk active
--------------------
ticket counts:
average (all): 0.52
average (churned): 0.06
average (non-churned): 0.64


## fundraising volume correlation

In [10]:
q = "select sum(amount) as vol, org from transactions where status='A' group by org"
trans = redshift_query_read(q)

In [11]:
trans['org'] = trans['org'].astype(int)
trans['zendesk_active'] = trans['org'].isin(zd['org'].tolist())
trans['zendesk_tickets'] = trans['org'].apply(lambda o: len(zd[zd['org']==o]))

In [14]:
print("average funds raise:")
print("zendesk active: ${:,.2f} ({})".format(trans[trans['zendesk_active']]['vol'].mean(), len(trans[trans['zendesk_active']])))
print("zendesk inactive: ${:,.2f} ({})".format(trans[~trans['zendesk_active']]['vol'].mean(), len(trans[~trans['zendesk_active']])))
print("-"*20)
print("ticket count correlation:")
print(trans[['zendesk_tickets', 'vol']].corr())

average funds raise:
zendesk active: $494,237.87 (1637)
zendesk inactive: $112,294.35 (3894)
--------------------
ticket count correlation:
                 zendesk_tickets       vol
zendesk_tickets         1.000000  0.072594
vol                     0.072594  1.000000
