In [2]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

# calculate trailing 24 months per donor

In [3]:
print("drop old values")
q = '''drop table if exists donors_orgs_trailing'''
redshift_query_write(q, schema='public')

cleanup potential old tables


In [5]:
print("calculate 12 to 24 months")
q = '''select
            org,
            email as donor,
            (case
                when date>=dateadd(month, -12, current_date) then 1 else 0
            end) as past_twelve,
            count(id) as transactions,
            sum(amount) as volume,
            count(distinct(t.recurring)) as recurring_origin,
            count(distinct(case when recurring!=0 then recurring else null end)) as recurring,
            sum(purchases_quantity) as purchases,
            sum(donations_count) as donations,
            sum(events_count) as events,
            sum(registrations_count) as registrations,
            sum(auctiondonation_count) as auctiondonations,
            sum(auctionpurchase_count) as auctionpurchases,
            sum(gift_assist_count) as giftassist,
            sum(matchinggifts_count) as matchinggifts
        into public.donors_orgs_trailing
        from production.transactions
        where
            status='A' and
            date>=dateadd(month, -24, current_date)
        group by org, email, past_twelve'''
redshift_query_write(q, schema='production')

calculate last 12 to 24 months


TypeError: 'NoneType' object is not iterable

# calculate trailing 12 months per org

In [None]:
print("drop old values")
q = '''truncate table orgs_reetention_trailing'''
redshift_query_write(q, schema='public')

In [11]:
q = '''select * from donor_orgs_trailing'''
df = redshift_query_read(q, schema='public')

In [13]:
"{:,}".format(len(df))

'5,376,315'

In [41]:
org_data = []
for org in df['org'].unique():
    _df = df[df['org']==org]
    rolling_12 = _df[_df['past_twelve']==1]
    rolling_12_24 = _df[_df['past_twelve']==2]
    
    if len(rolling_12_24) == 0 or len(rolling_12) == 0:
        # skip orgs with insufficient activity
        continue
    
    retained_donors = len(rolling_12[rolling_12['donor'].isin(rolling_12_24['donor'])])
    churned_donors = len(rolling_12_24[~rolling_12_24['donor'].isin(rolling_12['donor'])])
    new_donors = len(rolling_12[~rolling_12['donor'].isin(rolling_12_24['donor'])])
    if rolling_12_24['volume'].mean() == 0:
        mean_value = 0
    else:
        mean_value = (rolling_12['volume'].mean() - rolling_12_24['volume'].mean()) / rolling_12_24['volume'].mean()
    if rolling_12_24['volume'].median() == 0:
        median_value = 0
    else:
        median_value = (rolling_12['volume'].median() - rolling_12_24['volume'].median()) / rolling_12_24['volume'].median()
    
    org_data.append({
        'org': org,
        'retention': retained_donors / len(rolling_12_24),
        'churn': churned_donors / len(rolling_12_24),
        'new_donors': new_donors / len(rolling_12),
        'mean_value_change': mean_value,
        'median_value_change': median_value
    })

In [42]:
len(org_data), len(df['org'].unique())

(3465, 5175)

In [43]:
orgs_retention = pd.DataFrame(org_data)

In [44]:
orgs_retention.tail()

Unnamed: 0,org,retention,churn,new_donors,mean_value_change,median_value_change
3460,180227,0.076923,0.846154,0.0,-0.319372,0.0
3461,444468,0.0,1.0,1.0,0.0,0.0
3462,447666,0.0,1.0,1.0,3.277567,249.0
3463,448530,0.0,1.0,1.0,49.0,49.0
3464,448666,0.0,1.0,1.0,0.0,0.0


In [45]:
orgs_retention.mean()

org                    372031.959885
retention                   0.455288
churn                       0.585045
new_donors                  0.560827
mean_value_change          10.775611
median_value_change         3.403313
dtype: float64

In [46]:
save_dataframe_to_file('qgiv-stats-data', 'org_retention.csv', orgs_retention)

uploading to S3
Done


In [None]:
print("copying data from S3 to redshift")
q = '''copy org_retention
        from 's3://qgiv-stats-data/{}'
        iam_role 'arn:aws:iam::637885584661:role/AWSRoleForRedshift'
        emptyasnull
        blanksasnull
        fillrecord
        delimiter ','
        ignoreheader 1
        region 'us-east-1';'''.format('org_retention.csv')

redshift_query_write(q, schema='public')