# Brief

## Distinguishing characteristics

- active integrations: there is a significant tendency toward retention for organizations having used integrations
- labeled logs: there is a clear distinction in distribution between Qgiv-only organization log entries for retained and churned organizations

## Engineering

- For the labeled log entries, examining the log distributions leading up to churn event
- Looking specifically for account creation/delete events
- Looking specifically for form/event creation events

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

ftr_agged_df_qgiv = pd.read_csv("churn_qgiv_ftrs.csv")
ftr_agged_df_p2p = pd.read_csv("churn_p2p_ftrs.csv")

ftr_cols = [c for c in ftr_agged_df_qgiv.columns if 'label_' in c] + ['integrations']
target_col = 'churned'

In [None]:
# modeling qgiv
train_X, test_X, train_y, test_y = train_test_split(ftr_agged_df_qgiv[ftr_cols], ftr_agged_df_qgiv[target_col], test_size=0.33)

rf = RandomForestClassifier()
rf.fit(train_X, train_y)

perc_churned = (float(np.sum(train_y) + np.sum(test_y)) / float(len(ftr_agged_df_qgiv['org'].unique()))) * 100.
print("Churned cases: {} training, {} testing, {:.2f}% of total".format(np.sum(train_y), np.sum(test_y), perc_churned))
print("Accuracy: {}".format(rf.score(test_X, test_y)))

y_pred = rf.predict(test_X)
print(classification_report(test_y, y_pred))

In [None]:
# modeling p2p
train_X, test_X, train_y, test_y = train_test_split(ftr_agged_df_p2p[ftr_cols], ftr_agged_df_p2p[target_col], test_size=0.33)

rf = RandomForestClassifier()
rf.fit(train_X, train_y)

perc_churned = (float(np.sum(train_y) + np.sum(test_y)) / float(len(ftr_agged_df_p2p['org'].unique()))) * 100.
print("Churned cases: {} training, {} testing, {:.2f}% of total".format(np.sum(train_y), np.sum(test_y), perc_churned))
print("Accuracy: {}".format(rf.score(test_X, test_y)))

y_pred = rf.predict(test_X)
print(classification_report(test_y, y_pred))

# Support functions

In [None]:
log_entry_labels = ["has reached their fundraising","has reached its fundraising","has earned the %badge","had the %badge","donated %amount","made a donation to","was donated to","A donation was made to","% registered","joined %team",
                "has been registered","reset account password","switched donation from","activated recurring","updated donor information","deleted team","Raise Your First Donation","Share on Facebook / Twitter",
                "Upload Your Avatar","Recruit a Team Member","Update Your Personal Page","Send a Fundraising Email","updated organization","added participant","added payment method","edited payment method",
                "deleted payment method","cancelled recurring","deleted participant","changed settings for recipient","resent email receipt to email","updated personal information for transaction","updated donation information for transaction",
                "updated recurring","updated personal information for recurring","updated frequency information for recurring","updated payment method for recurring",
                "updated amount for recurring","updated payment expiration date for recurring","paused recurring","updated billing information for recurring","changed end date","changed start date","updated registration information for transaction","changed code from",
                "changed fee from","voided transaction","linked transaction","unlinked transaction","link transaction","linked recurring","unlinked recurring","added a return for transaction","added a chargeback for transaction",
                "refund","custom report","set form","changed form","changed organization","resent notification","resent admin notification","sent password reset email","edited team","changed maximum quantity",
                "disabled promo","switched participant","switched team","updated account password","added Form widget","updated Form widget","cloned a new form","edited donor","created Fixed Fee","updated Fixed Fee",
                "updated One Time Fee","created One Time Fee","promoted participant","added recipient","sms code","verified donation","changed username","merchant account","signup","edited participant","removed participant","API Access"]

def label_log_entry(msg):
    for i, l in enumerate(log_entry_labels):
        if l in msg:
            return i
    return None