# Further Exploration into Lifetime Return Differences

With data limited to 1 year, previous calculations on lifetime return of referred vs nonreferred accounts may prove inaccurate due to lack of data of newly created accounts. This section looks into lifetime return of accounts that were activated earlier in the provided data. 

In [1]:
import pandas as pd
import numpy as np
data1 = "1_paid_marketing.csv"
data2 = "2_hcp_data.csv"
data3 = "3_shifts_data.csv"
campaigns = pd.read_csv(data1)
hcp = pd.read_csv(data2)
shifts = pd.read_csv(data3)

In [2]:
# Convert datatype of 'lifetime shifts' and 'first shift time'
hcp.LIFETIME_SHIFTS = pd.to_numeric(hcp['LIFETIME_SHIFTS'], errors='coerce')
hcp.FIRST_SHIFT_TIME = pd.to_datetime(hcp['FIRST_SHIFT_TIME'], errors='coerce')

In [3]:
# Split up accounts based on time of first shift. Either 1st or 2nd half of 2022
hcp = hcp.set_index('FIRST_SHIFT_TIME')
hcp1 = hcp.loc['2022-01':'2022-06'].copy()
hcp2 = hcp.loc['2022-07':'2023-01'].copy()

In [4]:
# Showing how accounts made active in 1st half worked more shifts
print('Avg shifts of accounts active in 1st half: ' + str(hcp1['LIFETIME_SHIFTS'].mean()))
print('Avg shifts of accounts active in 2nd half: ' + str(hcp2['LIFETIME_SHIFTS'].mean()))

Avg shifts of accounts active in 1st half: 20.295779274100838
Avg shifts of accounts active in 2nd half: 14.65558912386707


In [5]:
tot_accounts = hcp1['HCP_ID'].count()
unique_referrers = hcp1['REFERRER'].value_counts().count()


print('In the first half of 2022:')
print('Total accounts created: ' + str(tot_accounts))
print('Total unique referrers: ' + str(unique_referrers))


In the first half of 2022:
Total accounts created: 6089
Total unique referrers: 1536


In [6]:
hcp1['REFERRER'].fillna('None', inplace=True)
hcp1.head()

Unnamed: 0_level_0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
FIRST_SHIFT_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-06-03,61e75e34dfb86b018a0d7cf7,"Minneapolis-St. Paul-Bloomington, MN-WI",CNA,1/19/22,5/24/22,5/24/22,6/1/22,49.0,,15595984274
2022-06-29,62b2499e52b98c01b901cd9d,"Chicago-Naperville-Elgin, IL-IN-WI",RN,6/21/22,6/22/22,6/22/22,6/29/22,2.0,62a48ae5691b0409a3c93073,16802693037
2022-02-28,61f962a1d6a2b2018a8e7159,"Riverside-San Bernardino-Ontario, CA",CNA,2/1/22,2/1/22,2/16/22,2/17/22,70.0,,18530777721
2022-05-20,626fd8342ccf7701c2d9d11d,"Cleveland-Elyria, OH",CNA,5/2/22,5/2/22,5/13/22,5/15/22,23.0,,17631861264
2022-06-20,62499dd3db204601acbd8311,"Deltona-Daytona Beach-Ormond Beach, FL",CNA,4/3/22,4/3/22,4/3/22,4/3/22,2.0,623a97fbca145401acdf587c,18581898213


In [9]:
# Find only active accounts made without a referral
nonref_active = hcp1.loc[hcp1['REFERRER'] == 'None'].copy()
nonref_active = nonref_active.reset_index(drop=True)
nonref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,61e75e34dfb86b018a0d7cf7,"Minneapolis-St. Paul-Bloomington, MN-WI",CNA,1/19/22,5/24/22,5/24/22,6/1/22,49.0,,15595984274
1,61f962a1d6a2b2018a8e7159,"Riverside-San Bernardino-Ontario, CA",CNA,2/1/22,2/1/22,2/16/22,2/17/22,70.0,,18530777721
2,626fd8342ccf7701c2d9d11d,"Cleveland-Elyria, OH",CNA,5/2/22,5/2/22,5/13/22,5/15/22,23.0,,17631861264
3,61fa31443e9933018b2ac502,"Bucyrus-Galion, OH",CNA,2/2/22,5/11/22,5/12/22,5/12/22,37.0,,15595984274
4,627f2dbe3ce6ce01bde9bd20,"Phoenix-Mesa-Chandler, AZ",RN,5/14/22,5/14/22,5/18/22,5/18/22,15.0,,18530777721


In [11]:
# Calculate avg lifetime value of non-referral active accounts
nraa_profit = 0
nraa_hcp = nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nraa_hcp):
    if nonref_active.iloc[x,2] == 'CNA':
        nraa_profit += nonref_active.iloc[x,7] * cna_rate
    elif nonref_active.iloc[x,2] == 'LVN':
        nraa_profit += nonref_active.iloc[x,7] * lvn_rate
    elif nonref_active.iloc[x,2] == 'RN':
        nraa_profit += nonref_active.iloc[x,7] * rn_rate

avg_nr_profit = nraa_profit / nraa_hcp
avg_nr_shifts = nonref_active['LIFETIME_SHIFTS'].mean()
print('Average profits from nonreferred workers: ' + str(avg_nr_profit))
print('Average shifts for nonreferred workers: ' + str(avg_nr_shifts))

Average profits from nonreferred workers: 152.38260661236225
Average shifts for nonreferred workers: 20.17105893627216


In [12]:
# Find only active accounts made WITH a referral
ref_active = hcp1.loc[hcp1['REFERRER'] != 'None'].copy()
ref_active = ref_active.reset_index(drop=True)
ref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY
0,62b2499e52b98c01b901cd9d,"Chicago-Naperville-Elgin, IL-IN-WI",RN,6/21/22,6/22/22,6/22/22,6/29/22,2.0,62a48ae5691b0409a3c93073,16802693037
1,62499dd3db204601acbd8311,"Deltona-Daytona Beach-Ormond Beach, FL",CNA,4/3/22,4/3/22,4/3/22,4/3/22,2.0,623a97fbca145401acdf587c,18581898213
2,62695ce9a0811501b63d432a,"Youngstown-Warren-Boardman, OH-PA",CNA,4/27/22,4/27/22,4/27/22,4/29/22,22.0,618348c536650a0186fc6c50,18672087778
3,627bf7aeefb13c01c188a565,"New York-Newark-Jersey City, NY-NJ-PA",CNA,5/11/22,5/11/22,5/11/22,5/14/22,2.0,61fd74fae8b6ff01af8d85d7,na
4,62476ac9ca832e01aa8d1246,"Napa, CA",CNA,4/1/22,4/4/22,4/22/22,4/29/22,58.0,616453b0ac801c0169f80702,na


In [14]:
# Calculate avg lifetime value of referral active accounts
raa_profit = 0
raa_hcp = ref_active['HCP_ID'].count()

for x in np.arange(raa_hcp):
    if ref_active.iloc[x,2] == 'CNA':
        raa_profit += ref_active.iloc[x,7] * cna_rate
    elif ref_active.iloc[x,2] == 'LVN':
        raa_profit += ref_active.iloc[x,7] * lvn_rate
    elif ref_active.iloc[x,2] == 'RN':
        raa_profit += ref_active.iloc[x,7] * rn_rate

avg_ref_profit = raa_profit / raa_hcp
avg_ref_shifts = ref_active['LIFETIME_SHIFTS'].mean()
print('Average profits from referred workers: ' + str(avg_ref_profit))
print('Average shifts for referred workers: ' + str(avg_ref_shifts))

Average profits from referred workers: 142.63446475195823
Average shifts for referred workers: 20.567624020887727


# Set up data for LA and Nashville analysis

In [None]:
# Find unique values for MSA
hcp['MSA'].value_counts()

In [None]:
# Splitting MSA into state for easier filtering below
hcp[['MSA', 'state']] = hcp['MSA'].str.split(', ', 1, expand=True)
hcp

In [None]:
# Replace empty referrers with 'None' for easier filtering later
hcp['REFERRER'].fillna('None', inplace=True)
hcp.head()

In [None]:
# Creating separate dataframes for California and Tennessee
cal_hcp = hcp.loc[hcp['state']=='CA']
ten_hcp = hcp.loc[hcp['state']=='TN']

In [None]:
# Easier to see unique values for Cal, and Ten below
cal_hcp['MSA'].value_counts()

In [None]:
ten_hcp['MSA'].value_counts()

In [None]:
# Create LA and Nash dataframes
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [None]:
# Remove inactive accounts and store active accounts in LA and Nash below
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_working_accounts = la_hcp['HCP_ID'].count()
la_working_accounts

In [None]:
nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_working_accounts = nash_hcp['HCP_ID'].count()
nash_working_accounts

# Calculating return for referral vs non in LA

In [None]:
# Find active accounts made without a referral
la_nonref_active = la_hcp.loc[la_hcp['REFERRER']=='None'].copy()
la_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_nonref_active = la_nonref_active.reset_index(drop=True)
la_nonref_active.head()

In [None]:
# Calculate avg lifetime value of non-referral active accounts
la_nraa_profit = 0
la_nraa_hcp = la_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_nraa_hcp):
    if la_nonref_active.iloc[x,2] == 'CNA':
        la_nraa_profit += la_nonref_active.iloc[x,8] * cna_rate
    elif la_nonref_active.iloc[x,2] == 'LVN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * lvn_rate
    elif la_nonref_active.iloc[x,2] == 'RN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * rn_rate

la_avg_nr_profit = la_nraa_profit / la_nraa_hcp
la_avg_nr_shifts = 

In [None]:
# Find active accounts made WITH a referral
la_ref_active = la_hcp.loc[la_hcp['REFERRER']!='None'].copy()
la_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_ref_active = la_ref_active.reset_index(drop=True)
la_ref_active.head()

In [None]:
la_raa_profit = 0
la_raa_hcp = la_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_raa_hcp):
    if la_ref_active.iloc[x,2] == 'CNA':
        la_raa_profit += la_ref_active.iloc[x,8] * cna_rate
    elif la_ref_active.iloc[x,2] == 'LVN':
        la_raa_profit += la_ref_active.iloc[x,8] * lvn_rate
    elif la_ref_active.iloc[x,2] == 'RN':
        la_raa_profit += la_ref_active.iloc[x,8] * rn_rate

la_raa_profit / la_raa_hcp

# Calculating return for referral vs non in Nash

In [None]:
# Find active accounts made without a referral
nash_nonref_active = nash_hcp.loc[nash_hcp['REFERRER']=='None'].copy()
nash_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_nonref_active = nash_nonref_active.reset_index(drop=True)
nash_nonref_active.head()

In [None]:
# Calculate avg lifetime value of non-referral active accounts
nash_nraa_profit = 0
nash_nraa_hcp = nash_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_nraa_hcp):
    if nash_nonref_active.iloc[x,2] == 'CNA':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * cna_rate
    elif nash_nonref_active.iloc[x,2] == 'LVN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * lvn_rate
    elif nash_nonref_active.iloc[x,2] == 'RN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * rn_rate

nash_nraa_profit / nash_nraa_hcp

In [None]:
# Find active accounts made WITH a referral
nash_ref_active = nash_hcp.loc[nash_hcp['REFERRER']!='None'].copy()
nash_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_ref_active = nash_ref_active.reset_index(drop=True)
nash_ref_active.head()

In [None]:
nash_raa_profit = 0
nash_raa_hcp = nash_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_raa_hcp):
    if nash_ref_active.iloc[x,2] == 'CNA':
        nash_raa_profit += nash_ref_active.iloc[x,8] * cna_rate
    elif nash_ref_active.iloc[x,2] == 'LVN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * lvn_rate
    elif nash_ref_active.iloc[x,2] == 'RN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * rn_rate

nash_raa_profit / nash_raa_hcp

In [None]:
# Create LA and Nash dataframes again
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [None]:
# Remove accounts without referrals. Store count of remaining referral accounts
la_hcp.dropna(subset = ['REFERRER'], inplace=True)
la_refer_accounts = la_hcp['REFERRER'].count()
la_refer_accounts

In [None]:
nash_hcp.dropna(subset = ['REFERRER'], inplace=True)
nash_refer_accounts = nash_hcp['REFERRER'].count()
nash_refer_accounts

In [None]:
# Store number of converted referrals for both LA and Nash
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_conv_refers = la_hcp['REFERRER'].count()
nash_conv_refers = nash_hcp['REFERRER'].count()
la_conv_refers

In [None]:
la_ref_conv_rate = la_conv_refers / la_refer_accounts
nash_ref_conv_rate = nash_conv_refers / nash_refer_accounts
print(la_ref_conv_rate)
print(nash_ref_conv_rate)

In [None]:
la_ref_workers = la_conv_refers / la_working_accounts
nash_ref_workers = nash_conv_refers / nash_working_accounts
print(la_ref_workers)
print(nash_ref_workers)

In [None]:
la_hcp.head()