# Exploring data after removing accounts where first shift occurred in 2nd half of 2022. Under theory that previous calculations penalized the referral program due to late cohorts not having enough time to take on additional shifts

In [1]:
import pandas as pd
import numpy as np
data1 = "1_paid_marketing.csv"
data2 = "2_hcp_data.csv"
data3 = "3_shifts_data.csv"
campaigns = pd.read_csv(data1)
hcp = pd.read_csv(data2)
shifts = pd.read_csv(data3)

In [2]:
# Convert datatype of 'lifetime shifts' and 'first shift time'
hcp.LIFETIME_SHIFTS = pd.to_numeric(hcp['LIFETIME_SHIFTS'], errors='coerce')
hcp.FIRST_SHIFT_TIME = pd.to_datetime(hcp['FIRST_SHIFT_TIME'], errors='coerce')

In [3]:
# Split up accounts based on time of first shift. Either 1st or 2nd half of 2022
hcp = hcp.set_index('FIRST_SHIFT_TIME')
hcp1 = hcp.loc['2022-01':'2022-06']
hcp2 = hcp.loc['2022-07':'2022-12']

In [4]:
# Showing how accounts made active in 1st half worked more shifts
print('Avg shifts of accounts active in 1st half: ' + str(hcp1['LIFETIME_SHIFTS'].mean()))
print('Avg shifts of accounts active in 2nd half: ' + str(hcp2['LIFETIME_SHIFTS'].mean()))

Avg shifts of accounts active in 1st half: 20.295779274100838
Avg shifts of accounts active in 2nd half: 15.232385661310259


# Set up data for LA and Nashville analysis

In [None]:
# Find unique values for MSA
hcp['MSA'].value_counts()

In [None]:
# Splitting MSA into state for easier filtering below
hcp[['MSA', 'state']] = hcp['MSA'].str.split(', ', 1, expand=True)
hcp

In [None]:
# Replace empty referrers with 'None' for easier filtering later
hcp['REFERRER'].fillna('None', inplace=True)
hcp.head()

In [None]:
# Creating separate dataframes for California and Tennessee
cal_hcp = hcp.loc[hcp['state']=='CA']
ten_hcp = hcp.loc[hcp['state']=='TN']

In [None]:
# Easier to see unique values for Cal, and Ten below
cal_hcp['MSA'].value_counts()

In [None]:
ten_hcp['MSA'].value_counts()

In [None]:
# Create LA and Nash dataframes
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [None]:
# Remove inactive accounts and store active accounts in LA and Nash below
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_working_accounts = la_hcp['HCP_ID'].count()
la_working_accounts

In [None]:
nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_working_accounts = nash_hcp['HCP_ID'].count()
nash_working_accounts

# Calculating return for referral vs non in LA

In [None]:
# Find active accounts made without a referral
la_nonref_active = la_hcp.loc[la_hcp['REFERRER']=='None'].copy()
la_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_nonref_active = la_nonref_active.reset_index(drop=True)
la_nonref_active.head()

In [None]:
# Calculate avg lifetime value of non-referral active accounts
la_nraa_profit = 0
la_nraa_hcp = la_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_nraa_hcp):
    if la_nonref_active.iloc[x,2] == 'CNA':
        la_nraa_profit += la_nonref_active.iloc[x,8] * cna_rate
    elif la_nonref_active.iloc[x,2] == 'LVN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * lvn_rate
    elif la_nonref_active.iloc[x,2] == 'RN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * rn_rate

la_avg_nr_profit = la_nraa_profit / la_nraa_hcp
la_avg_nr_shifts = 

In [None]:
# Find active accounts made WITH a referral
la_ref_active = la_hcp.loc[la_hcp['REFERRER']!='None'].copy()
la_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_ref_active = la_ref_active.reset_index(drop=True)
la_ref_active.head()

In [None]:
la_raa_profit = 0
la_raa_hcp = la_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_raa_hcp):
    if la_ref_active.iloc[x,2] == 'CNA':
        la_raa_profit += la_ref_active.iloc[x,8] * cna_rate
    elif la_ref_active.iloc[x,2] == 'LVN':
        la_raa_profit += la_ref_active.iloc[x,8] * lvn_rate
    elif la_ref_active.iloc[x,2] == 'RN':
        la_raa_profit += la_ref_active.iloc[x,8] * rn_rate

la_raa_profit / la_raa_hcp

# Calculating return for referral vs non in Nash

In [None]:
# Find active accounts made without a referral
nash_nonref_active = nash_hcp.loc[nash_hcp['REFERRER']=='None'].copy()
nash_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_nonref_active = nash_nonref_active.reset_index(drop=True)
nash_nonref_active.head()

In [None]:
# Calculate avg lifetime value of non-referral active accounts
nash_nraa_profit = 0
nash_nraa_hcp = nash_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_nraa_hcp):
    if nash_nonref_active.iloc[x,2] == 'CNA':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * cna_rate
    elif nash_nonref_active.iloc[x,2] == 'LVN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * lvn_rate
    elif nash_nonref_active.iloc[x,2] == 'RN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * rn_rate

nash_nraa_profit / nash_nraa_hcp

In [None]:
# Find active accounts made WITH a referral
nash_ref_active = nash_hcp.loc[nash_hcp['REFERRER']!='None'].copy()
nash_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_ref_active = nash_ref_active.reset_index(drop=True)
nash_ref_active.head()

In [None]:
nash_raa_profit = 0
nash_raa_hcp = nash_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_raa_hcp):
    if nash_ref_active.iloc[x,2] == 'CNA':
        nash_raa_profit += nash_ref_active.iloc[x,8] * cna_rate
    elif nash_ref_active.iloc[x,2] == 'LVN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * lvn_rate
    elif nash_ref_active.iloc[x,2] == 'RN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * rn_rate

nash_raa_profit / nash_raa_hcp

In [None]:
# Create LA and Nash dataframes again
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [None]:
# Remove accounts without referrals. Store count of remaining referral accounts
la_hcp.dropna(subset = ['REFERRER'], inplace=True)
la_refer_accounts = la_hcp['REFERRER'].count()
la_refer_accounts

In [None]:
nash_hcp.dropna(subset = ['REFERRER'], inplace=True)
nash_refer_accounts = nash_hcp['REFERRER'].count()
nash_refer_accounts

In [None]:
# Store number of converted referrals for both LA and Nash
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_conv_refers = la_hcp['REFERRER'].count()
nash_conv_refers = nash_hcp['REFERRER'].count()
la_conv_refers

In [None]:
la_ref_conv_rate = la_conv_refers / la_refer_accounts
nash_ref_conv_rate = nash_conv_refers / nash_refer_accounts
print(la_ref_conv_rate)
print(nash_ref_conv_rate)

In [None]:
la_ref_workers = la_conv_refers / la_working_accounts
nash_ref_workers = nash_conv_refers / nash_working_accounts
print(la_ref_workers)
print(nash_ref_workers)

In [None]:
la_hcp.head()