In [1]:
import pandas as pd
import numpy as np
data1 = "1_paid_marketing.csv"
data2 = "2_hcp_data.csv"
data3 = "3_shifts_data.csv"
campaigns = pd.read_csv(data1)
hcp = pd.read_csv(data2)
shifts = pd.read_csv(data3)

In [2]:
hcp

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,633be91bfb9a096b9e4d3a82,"Wichita, KS",CNA,10/5/22,10/13/22,10/29/22,11/1/22,11/21/22,2,,17631861264
1,632d018f9603d7808339a6bb,"Tampa-St. Petersburg-Clearwater, FL",CNA,9/23/22,,,,,,,17631861264
2,6361931a7ccb0c3b06407c33,"Scranton--Wilkes-Barre, PA",RN,11/1/22,11/1/22,,,,,,18530777721
3,62feb63c88ad3001baf3108f,"St. Louis, MO-IL",CNA,8/18/22,8/18/22,,,,,,17631861264
4,63841ac43540dfe4c2ec0996,"Cincinnati, OH-KY-IN",CNA,11/28/22,11/28/22,,,,,,18530777721
5,63713dba7ccb0c3b06b860e4,"Milwaukee-Waukesha, WI",CNA,11/13/22,11/13/22,,,,,,18530777721
6,628d687b3822a201bff6cf91,"Columbus, OH",CNA,5/24/22,,,,,,,16920183767
7,63309d8c9603d780838a78aa,"Atlanta-Sandy Springs-Alpharetta, GA",LVN,9/25/22,9/26/22,9/26/22,10/10/22,10/16/22,23,,17631861264
8,6374ae287ccb0c3b062e2919,"Miami-Fort Lauderdale-Pompano Beach, FL",CNA,11/16/22,11/16/22,,,,,,18530777721
9,638678383c1385547063e170,"Providence-Warwick, RI-MA",RN,11/29/22,11/29/22,12/1/22,,,,,18661880305


In [3]:
# Store number of accounts
tot_accounts = hcp['HCP_ID'].count()
tot_accounts

67317

In [4]:
# Find number of unique referrers
unique_referrers = hcp['REFERRER'].value_counts().count()
unique_referrers

6151

In [5]:
# Replace empty referrers with 'None' for easier filtering later
hcp['REFERRER'].fillna('None', inplace=True)
hcp.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,633be91bfb9a096b9e4d3a82,"Wichita, KS",CNA,10/5/22,10/13/22,10/29/22,11/1/22,11/21/22,2.0,,17631861264
1,632d018f9603d7808339a6bb,"Tampa-St. Petersburg-Clearwater, FL",CNA,9/23/22,,,,,,,17631861264
2,6361931a7ccb0c3b06407c33,"Scranton--Wilkes-Barre, PA",RN,11/1/22,11/1/22,,,,,,18530777721
3,62feb63c88ad3001baf3108f,"St. Louis, MO-IL",CNA,8/18/22,8/18/22,,,,,,17631861264
4,63841ac43540dfe4c2ec0996,"Cincinnati, OH-KY-IN",CNA,11/28/22,11/28/22,,,,,,18530777721


In [6]:
# Convert lifetime_shifts to numeric for calculations later
hcp.LIFETIME_SHIFTS = pd.to_numeric(hcp['LIFETIME_SHIFTS'], errors='coerce')
hcp.dtypes

HCP_ID               object
MSA                  object
QUALIFICATION        object
CREATED_AT           object
LICENSE_REVIEWED     object
ONBOARD_AT           object
FIRST_CLAIM_TIME     object
FIRST_SHIFT_TIME     object
LIFETIME_SHIFTS     float64
REFERRER             object
CAMPAIGN_KEY         object
dtype: object

In [7]:
# Remove accounts without a shift
working_hcp = pd.read_csv(data2)
working_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)

In [8]:
# Store number of accounts that have worked
tot_working_accounts = working_hcp['HCP_ID'].count()
tot_working_accounts

16350

In [9]:
# Remove accounts without a referral
referral_accounts = pd.read_csv(data2)
referral_accounts.dropna(subset = ['REFERRER'], inplace=True)

In [10]:
# Store number of referrals
tot_refers = referral_accounts['REFERRER'].count()
tot_refers

9167

In [11]:
# Remove referral accounts that never activated
referral_accounts.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)

In [12]:
# Store number of converted referrals
tot_conv_refers = referral_accounts['REFERRER'].count()
tot_conv_refers

6541

In [13]:
# Conversion rate of references
conv_rate = tot_conv_refers / tot_refers
conv_rate

0.7135376895385622

In [14]:
# Percent of active accounts who cliamed reference
ref_workers = tot_conv_refers / tot_working_accounts
ref_workers

0.4000611620795107

In [15]:
# Percent of workers who referred others
referrer_workers = unique_referrers / tot_working_accounts
referrer_workers

0.3762079510703364

# Lifetime Return of Referral vs. Non-referral Acquisitions

In [16]:
# Check that there are only 3 possible qualifications
hcp['QUALIFICATION'].value_counts()

CNA    39499
LVN    13939
RN     13879
Name: QUALIFICATION, dtype: int64

In [17]:
# Find only active accounts made without a referral
nonref_active = hcp.loc[hcp['REFERRER']=='None'].copy()
nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nonref_active = nonref_active.reset_index(drop=True)
nonref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,633be91bfb9a096b9e4d3a82,"Wichita, KS",CNA,10/5/22,10/13/22,10/29/22,11/1/22,11/21/22,2.0,,17631861264
1,63309d8c9603d780838a78aa,"Atlanta-Sandy Springs-Alpharetta, GA",LVN,9/25/22,9/26/22,9/26/22,10/10/22,10/16/22,23.0,,17631861264
2,61e75e34dfb86b018a0d7cf7,"Minneapolis-St. Paul-Bloomington, MN-WI",CNA,1/19/22,5/24/22,5/24/22,6/1/22,6/3/22,49.0,,15595984274
3,6335c00b9603d78083e59772,"Chicago-Naperville-Elgin, IL-IN-WI",CNA,9/29/22,10/8/22,10/8/22,10/8/22,10/24/22,13.0,,17631861264
4,628e47fca9b31401c35619d2,"Dallas-Fort Worth-Arlington, TX",RN,5/25/22,5/25/22,5/27/22,8/13/22,8/13/22,8.0,,18530777721


In [18]:
# Calculate avg lifetime value of non-referral active accounts
nraa_profit = 0
nraa_hcp = nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nraa_hcp):
    if nonref_active.iloc[x,2] == 'CNA':
        nraa_profit += nonref_active.iloc[x,8] * cna_rate
    elif nonref_active.iloc[x,2] == 'LVN':
        nraa_profit += nonref_active.iloc[x,8] * lvn_rate
    elif nonref_active.iloc[x,2] == 'RN':
        nraa_profit += nonref_active.iloc[x,8] * rn_rate

avg_nr_profit = nraa_profit / nraa_hcp
avg_nr_shifts = nonref_active['LIFETIME_SHIFTS'].mean()
print('Average profits from nonreferred workers: ' + str(avg_nr_profit))
print('Average shifts for nonreferred workers: ' + str(avg_nr_shifts))

Average profits from nonreferred workers: 129.01620960342544
Average shifts for nonreferred workers: 17.093383627281067


In [19]:
# Find only active accounts made WITH a referral
ref_active = hcp.loc[hcp['REFERRER']!='None'].copy()
ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
ref_active = ref_active.reset_index(drop=True)
ref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,62b2499e52b98c01b901cd9d,"Chicago-Naperville-Elgin, IL-IN-WI",RN,6/21/22,6/22/22,6/22/22,6/29/22,6/29/22,2.0,62a48ae5691b0409a3c93073,16802693037
1,625122a4a2d1ff01b003e0ec,"Youngstown-Warren-Boardman, OH-PA",CNA,4/9/22,8/20/22,8/21/22,8/22/22,8/24/22,33.0,626d668f26a30901c5af9741,17631861264
2,632376bb9603d78083c59bf1,"Portland-Vancouver-Hillsboro, OR-WA",CNA,9/15/22,9/15/22,9/30/22,10/4/22,10/4/22,22.0,62edf0aa44b0f101b46c89ab,17944426072
3,631b401f9603d78083193e28,"Los Angeles-Long Beach-Anaheim, CA",CNA,9/13/22,9/13/22,9/23/22,9/27/22,10/1/22,6.0,61c60d19b8bfc80180d64eac,17631861264
4,63773e1d7ccb0c3b06646f20,"Providence-Warwick, RI-MA",CNA,11/18/22,11/18/22,11/18/22,11/22/22,11/24/22,20.0,63322f689603d780835ab925,18576997082


In [20]:
# Calculate avg lifetime value of referral active accounts
raa_profit = 0
raa_hcp = ref_active['HCP_ID'].count()

for x in np.arange(raa_hcp):
    if ref_active.iloc[x,2] == 'CNA':
        raa_profit += ref_active.iloc[x,8] * cna_rate
    elif ref_active.iloc[x,2] == 'LVN':
        raa_profit += ref_active.iloc[x,8] * lvn_rate
    elif ref_active.iloc[x,2] == 'RN':
        raa_profit += ref_active.iloc[x,8] * rn_rate

avg_ref_profit = raa_profit / raa_hcp
avg_ref_shifts = ref_active['LIFETIME_SHIFTS'].mean()
print('Average profits from referred workers: ' + str(avg_ref_profit))
print('Average shifts for referred workers: ' + str(avg_ref_shifts))

Average profits from referred workers: 111.60525913468888
Average shifts for referred workers: 16.250267543189114
