In [1]:
import pandas as pd
import sys

sys.path.insert(1, '../../../scripts/')
from s3_support import *

In [2]:
files = ['tasks.clean.csv', 'events.clean.csv']

df = get_dataframe_from_file("sfc-export", files[0])
df = df.append(get_dataframe_from_file("sfc-export", files[1]))

In [3]:
df['CreatedDate'] = pd.to_datetime(df['CreatedDate'])
df = df[df['CreatedDate'].dt.year>=2016]

df['Type'] = df['Type'].str.lower()
df['Subject'] = df['Subject'].str.lower()
df['Description'] = df['Description'].str.lower()

In [4]:
print("{} rows; {} columns".format(len(df), len(df.columns)))
print("\n")

print("Columns:")
print(df.columns)

459989 rows; 11 columns


Columns:
Index(['Id', 'WhoId', 'WhatId', 'Subject', 'OwnerId', 'Description', 'Type',
       'AccountId', 'CreatedDate', 'CreatedById', 'SystemModstamp'],
      dtype='object')


In [5]:
df['Type'].value_counts(normalize=True)

email                    0.755201
call                     0.119953
pre-demo follow up       0.051603
initial contact          0.027351
post-demo follow up      0.024622
demo completed           0.004700
lead qualification       0.003826
training                 0.003309
meeting                  0.002270
demo scheduled           0.001763
60_day_follow_up_call    0.000872
downgrade                0.000854
cancellation             0.000648
upgrade                  0.000630
one pager campaign       0.000437
kiosk activity           0.000398
none                     0.000383
lead submitted form      0.000317
mobile vt activity       0.000274
prepared materials       0.000220
t-shirt campaign         0.000133
interested               0.000074
not interested           0.000054
recovery                 0.000050
sale closed              0.000024
noise                    0.000013
not ready                0.000009
webinar                  0.000009
60 day follow up         0.000002
cos - 45 day c

In [6]:
type_list = df['Type'].unique().tolist()

# another round of type correction
def infer_type(r):
    for t in type_list:
        if r['Subject'] == t or r['Description'] == t:
            return t
    
    if str(r['Subject']).startswith("email:") and r['Type'] == 'call':
        return 'email'
    if str(r['Description']).startswith('onboarding') or '/onboarding/' in str(r['Subject']):
        return 'onboarding'
    elif str(r['Description']).startswith('training'):
        return 'training'
    elif r['Subject'] == 'phone call' and r['Type'] == 'email':
        return 'call'
    elif 'call with' in str(r['Description']) and r['Type'] == 'email':
        return 'call'
    
    return r['Type']
    
df['Type'] = df.apply(infer_type, axis=1)

In [10]:
df['Type'].value_counts(normalize=True) * 100.

email                    75.039403
call                     13.159445
pre-demo follow up        5.150558
post-demo follow up       2.458537
initial contact           2.071789
demo completed            0.438924
lead qualification        0.377400
training                  0.307833
meeting                   0.155221
demo scheduled            0.150656
downgrade                 0.087828
60_day_follow_up_call     0.083480
upgrade                   0.070219
cancellation              0.068480
not interested            0.062610
onboarding                0.060436
one pager campaign        0.043697
kiosk activity            0.040218
cos - 45 day call         0.038914
lead submitted form       0.031088
mobile vt activity        0.028044
none                      0.025435
prepared materials        0.021957
t-shirt campaign          0.013261
interested                0.005218
recovery                  0.005000
noise                     0.001304
sale closed               0.001304
60 day follow up    

In [9]:
pd.set_option('display.max_colwidth', 500)

df[df['Type']=='email'][['Subject', 'Description']].dropna().iloc[-500:-480]

Unnamed: 0,Subject,Description
464054,email: re: qgiv - auctions,"additional to: kris.hoxie@bigsupnorth.com cc: bcc: attachment: subject: re: qgiv - auctions body: hi kris, thank you for providing that information, we do see onecause as one of our closest competitors so it's good to know what they were able to offer that qgiv did not. we will add this information to what need to improve upon. can i give you a quick call to better understand their system in regards to their live auction features and the check-out process? i would not anticipate this c..."
464055,email: re: qgiv - auctions,"additional to: kris.hoxie@bigsupnorth.com cc: bcc: attachment: subject: re: qgiv - auctions body: hi kris, thank you for providing that information, we do see onecause as one of our closest competitors so it's good to know what they were able to offer that qgiv did not. we will add this information to what need to improve upon. can i give you a quick call to better understand their system in regards to their live auction features and the check-out process? i would not anticipate this c..."
464056,unresolved email: re: qgiv sign in,"additional to: bethcolbyclark@gmail.com cc: bcc: attachment: image001.jpg subject: re: qgiv sign in body: hi beth, i?ve asked our product team about this and their currently taking a closer look to see what the issue may be. one suggestion was to try logging at https://secure.qgiv.com/control/login with bethclark@stepfunder.com<mailto:bethclark@stepfunder.com> as the username and whatever password you attempted to set last night. hopefully that will work and you can get in to your demo ..."
464058,email: appointment request,"additional to: jwithrow@familypromisebutlercounty.com cc: bcc: attachment: subject: appointment request body: jennifer, i hope you are doing well. are you interested in scheduling a live customer-specific gotomeeting webinar to learn more about how qgiv can help you specifically? my calendly calendar link to schedule a qgiv demonstration.<https://calendly.com/donald-dial/60min> respectfully; [http://new.qgiv.com/_resources/images/donald.jpg] [https://www.g2crowd.com/products/qgiv..."
464073,email: qgiv for advocates for basic legal equality,"additional to: kharshaw@ablelaw.org cc: bcc: attachment: subject: qgiv for advocates for basic legal equality body: good morning karla, thanks for signing up for our giving tuesday series . i trust you found it to be a valuable resource. if you've got a few minutes, i'd love to talk to you and learn more about your organization and your online fundraising programs. it'd be great to hear how you're doing, learn about your fundraising goals, and talk about how qgiv can help you and the ..."
464074,email: qgiv for st. vincent de paul of baltimore,"additional to: dmonaemccloud@gmail.com cc: bcc: attachment: subject: qgiv for st. vincent de paul of baltimore body: good morning darnita, thanks for downloading our fundraising templates. i trust you found it to be a valuable resource. if you've got a few minutes, i'd love to talk to you and learn more about your organization and your online fundraising programs. it'd be great to hear how you're doing, learn about your fundraising goals, and talk about how qgiv can help you and the w..."
464080,email: re: women?s resource center,"additional to: execdir@tampabay.rr.com cc: bcc: attachment: auctions_package.pdf subject: re: women?s resource center body: hi cherie, thank you for going through the demonstration this morning. attached to this email you will find the handout information that i mentioned. hopefully this helps. let me know if any questions come up as you continue your research. thanks, [http://new.qgiv.com/_resources/images/aaron.jpg] [https://www.g2crowd.com/products/qgiv/widgets/stars?color=b..."
464081,email: re: women?s resource center,"additional to: execdir@tampabay.rr.com cc: bcc: attachment: auctions_package.pdf subject: re: women?s resource center body: hi cherie, thank you for going through the demonstration this morning. attached to this email you will find the handout information that i mentioned. hopefully this helps. let me know if any questions come up as you continue your research. thanks, [http://new.qgiv.com/_resources/images/aaron.jpg] [https://www.g2crowd.com/products/qgiv/widgets/stars?color=b..."
464092,email: re: qgiv setup,"additional to: john.grainger@ocsarts.net cc: afton.lorenz@qgiv.com bcc: attachment: image001.png, image002.png subject: re: qgiv setup body: hi john, i spoke with donor perfect yesterday. they let me know that they are working on the api credentials for your safe save merchant account. once they provide that information then we can update the application. thanks, thank you, aaron liford account executive aaron.liford@qgiv.com<mailto:aaron.liford@qgiv.com> 863.496.6121 from: john gra..."
464093,email: re: qgiv setup,"additional to: john.grainger@ocsarts.net cc: afton.lorenz@qgiv.com bcc: attachment: image001.png, image002.png subject: re: qgiv setup body: hi john, i spoke with donor perfect yesterday. they let me know that they are working on the api credentials for your safe save merchant account. once they provide that information then we can update the application. thanks, thank you, aaron liford account executive aaron.liford@qgiv.com<mailto:aaron.liford@qgiv.com> 863.496.6121 from: john gra..."


# Store merged data set

In [11]:
save_dataframe_to_file("sfc-export", "activity_history.csv", df)

uploading to S3
Done
