In [1]:
import pandas as pd
import numpy as np
data1 = "1_paid_marketing.csv"
data2 = "2_hcp_data.csv"
data3 = "3_shifts_data.csv"
campaigns = pd.read_csv(data1)
hcp = pd.read_csv(data2)
shifts = pd.read_csv(data3)

# Set up data for LA and Nashville analysis

In [2]:
# Convert datatype of lifetime shifts
hcp.LIFETIME_SHIFTS = pd.to_numeric(hcp['LIFETIME_SHIFTS'], errors='coerce')

In [3]:
# Find unique values for MSA
hcp['MSA'].value_counts()

Chicago-Naperville-Elgin, IL-IN-WI             5803
Los Angeles-Long Beach-Anaheim, CA             3746
New York-Newark-Jersey City, NY-NJ-PA          3162
Dallas-Fort Worth-Arlington, TX                2041
Riverside-San Bernardino-Ontario, CA           1900
Cleveland-Elyria, OH                           1838
Atlanta-Sandy Springs-Alpharetta, GA           1772
Philadelphia-Camden-Wilmington, PA-NJ-DE-MD    1769
Houston-The Woodlands-Sugar Land, TX           1522
Detroit-Warren-Dearborn, MI                    1246
St. Louis, MO-IL                               1163
San Diego-Chula Vista-Carlsbad, CA             1128
Pittsburgh, PA                                 1097
Miami-Fort Lauderdale-Pompano Beach, FL        1048
San Francisco-Oakland-Berkeley, CA             1025
Cincinnati, OH-KY-IN                           1004
Phoenix-Mesa-Chandler, AZ                       946
Milwaukee-Waukesha, WI                          878
Sacramento-Roseville-Folsom, CA                 814
Tampa-St. Pe

In [4]:
# Splitting MSA into state for easier filtering below
hcp[['MSA', 'state']] = hcp['MSA'].str.split(', ', 1, expand=True)
hcp

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,633be91bfb9a096b9e4d3a82,Wichita,CNA,10/5/22,10/13/22,10/29/22,11/1/22,11/21/22,2.0,,17631861264,KS
1,632d018f9603d7808339a6bb,Tampa-St. Petersburg-Clearwater,CNA,9/23/22,,,,,,,17631861264,FL
2,6361931a7ccb0c3b06407c33,Scranton--Wilkes-Barre,RN,11/1/22,11/1/22,,,,,,18530777721,PA
3,62feb63c88ad3001baf3108f,St. Louis,CNA,8/18/22,8/18/22,,,,,,17631861264,MO-IL
4,63841ac43540dfe4c2ec0996,Cincinnati,CNA,11/28/22,11/28/22,,,,,,18530777721,OH-KY-IN
5,63713dba7ccb0c3b06b860e4,Milwaukee-Waukesha,CNA,11/13/22,11/13/22,,,,,,18530777721,WI
6,628d687b3822a201bff6cf91,Columbus,CNA,5/24/22,,,,,,,16920183767,OH
7,63309d8c9603d780838a78aa,Atlanta-Sandy Springs-Alpharetta,LVN,9/25/22,9/26/22,9/26/22,10/10/22,10/16/22,23.0,,17631861264,GA
8,6374ae287ccb0c3b062e2919,Miami-Fort Lauderdale-Pompano Beach,CNA,11/16/22,11/16/22,,,,,,18530777721,FL
9,638678383c1385547063e170,Providence-Warwick,RN,11/29/22,11/29/22,12/1/22,,,,,18661880305,RI-MA


In [5]:
# Replace empty referrers with 'None' for easier filtering later
hcp['REFERRER'].fillna('None', inplace=True)
hcp.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,633be91bfb9a096b9e4d3a82,Wichita,CNA,10/5/22,10/13/22,10/29/22,11/1/22,11/21/22,2.0,,17631861264,KS
1,632d018f9603d7808339a6bb,Tampa-St. Petersburg-Clearwater,CNA,9/23/22,,,,,,,17631861264,FL
2,6361931a7ccb0c3b06407c33,Scranton--Wilkes-Barre,RN,11/1/22,11/1/22,,,,,,18530777721,PA
3,62feb63c88ad3001baf3108f,St. Louis,CNA,8/18/22,8/18/22,,,,,,17631861264,MO-IL
4,63841ac43540dfe4c2ec0996,Cincinnati,CNA,11/28/22,11/28/22,,,,,,18530777721,OH-KY-IN


In [6]:
# Creating separate dataframes for California and Tennessee
cal_hcp = hcp.loc[hcp['state']=='CA']
ten_hcp = hcp.loc[hcp['state']=='TN']

In [7]:
# Easier to see unique values for Cal, and Ten below
cal_hcp['MSA'].value_counts()

Los Angeles-Long Beach-Anaheim      3746
Riverside-San Bernardino-Ontario    1900
San Diego-Chula Vista-Carlsbad      1128
San Francisco-Oakland-Berkeley      1025
Sacramento-Roseville-Folsom          814
Stockton                             466
Fresno                               428
San Jose-Sunnyvale-Santa Clara       392
Modesto                              335
Chico                                190
Vallejo                              175
Visalia                              111
Merced                                97
Oxnard-Thousand Oaks-Ventura          90
Santa Rosa-Petaluma                   69
Salinas                               64
Yuba City                             55
Madera                                52
Hanford-Corcoran                      37
Napa                                  36
Bakersfield                           32
Redding                               28
Santa Cruz-Watsonville                23
Santa Maria-Santa Barbara             18
Truckee-Grass Va

In [8]:
ten_hcp['MSA'].value_counts()

Nashville-Davidson--Murfreesboro--Franklin    445
Knoxville                                     136
Jackson                                        49
Tullahoma-Manchester                           25
Johnson City                                   20
Athens                                         15
Sevierville                                    12
Cookeville                                     12
Morristown                                     12
Cleveland                                      12
Crossville                                     11
McMinnville                                    10
Shelbyville                                     9
Union City                                      9
Lewisburg                                       9
Brownsville                                     9
Dyersburg                                       6
Newport                                         6
Paris                                           5
Martin                                          4


In [9]:
# Create LA and Nash dataframes
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [10]:
# Remove inactive accounts and store active accounts in LA and Nash below
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_working_accounts = la_hcp['HCP_ID'].count()

nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_working_accounts = nash_hcp['HCP_ID'].count()

print('Active accounts in LA: ' + str(la_working_accounts))
print('Active accounts in Nashville: ' + str(nash_working_accounts))

Active accounts in LA: 1297
Active accounts in Nashville: 117


# Calculating return for referral vs non in LA

In [11]:
# Find active accounts made without a referral
la_nonref_active = la_hcp.loc[la_hcp['REFERRER']=='None'].copy()
la_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_nonref_active = la_nonref_active.reset_index(drop=True)
la_nonref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,62a255d0c50003846a02e647,Los Angeles-Long Beach-Anaheim,CNA,6/9/22,6/9/22,9/26/22,9/26/22,9/30/22,20.0,,18530777721,CA
1,621f2a1678c27801ac191b9b,Los Angeles-Long Beach-Anaheim,CNA,3/2/22,3/2/22,3/11/22,3/24/22,3/30/22,21.0,,na,CA
2,633086869603d78083e0e5e1,Los Angeles-Long Beach-Anaheim,CNA,9/25/22,11/18/22,12/9/22,12/10/22,12/14/22,10.0,,na,CA
3,62e9c57200e29d01ab56e9a2,Los Angeles-Long Beach-Anaheim,LVN,8/3/22,8/3/22,8/29/22,8/30/22,9/5/22,19.0,,na,CA
4,626ad36d7e73ce01c14cb6e8,Los Angeles-Long Beach-Anaheim,CNA,4/28/22,4/28/22,,4/28/22,5/1/22,20.0,,na,CA


In [12]:
# Calculate avg lifetime value of non-referral active accounts
la_nraa_profit = 0
la_nraa_hcp = la_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_nraa_hcp):
    if la_nonref_active.iloc[x,2] == 'CNA':
        la_nraa_profit += la_nonref_active.iloc[x,8] * cna_rate * 8
    elif la_nonref_active.iloc[x,2] == 'LVN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * lvn_rate * 8
    elif la_nonref_active.iloc[x,2] == 'RN':
        la_nraa_profit += la_nonref_active.iloc[x,8] * rn_rate * 8

la_avg_nr_profit = la_nraa_profit / la_nraa_hcp
la_avg_nr_shifts = la_nonref_active['LIFETIME_SHIFTS'].mean()

print('Average profits from nonreferred workers in LA: ' + str(la_avg_nr_profit))
print('Average shifts for nonreferred workers in LA: ' + str(la_avg_nr_shifts))

Average profits from nonreferred workers in LA: 946.9668246445498
Average shifts for nonreferred workers in LA: 16.629146919431278


In [13]:
# Find active accounts made WITH a referral
la_ref_active = la_hcp.loc[la_hcp['REFERRER'] != 'None'].copy()
la_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
la_ref_active = la_ref_active.reset_index(drop=True)
la_ref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,631b401f9603d78083193e28,Los Angeles-Long Beach-Anaheim,CNA,9/13/22,9/13/22,9/23/22,9/27/22,10/1/22,6.0,61c60d19b8bfc80180d64eac,17631861264,CA
1,62f2cf6ac3ffc601b64d513b,Los Angeles-Long Beach-Anaheim,CNA,8/9/22,8/9/22,8/16/22,10/19/22,10/20/22,27.0,626357794c685201ab98d0a9,13743509691,CA
2,634842d3fb9a096b9e77ea7f,Los Angeles-Long Beach-Anaheim,CNA,10/14/22,10/14/22,10/16/22,10/22/22,10/22/22,37.0,61aff2db87f1b201848fca57,18530777721,CA
3,62b98e914cab7601c1b0c64b,Los Angeles-Long Beach-Anaheim,CNA,6/27/22,9/3/22,9/3/22,9/4/22,9/4/22,33.0,61ead492b9162b018a244959,na,CA
4,62b23d258e735a01bc79773e,Los Angeles-Long Beach-Anaheim,LVN,6/21/22,6/21/22,6/22/22,6/22/22,6/26/22,10.0,6286843c9a73a501c128749c,na,CA


In [14]:
la_raa_profit = 0
la_raa_hcp = la_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(la_raa_hcp):
    if la_ref_active.iloc[x,2] == 'CNA':
        la_raa_profit += la_ref_active.iloc[x,8] * cna_rate * 8
    elif la_ref_active.iloc[x,2] == 'LVN':
        la_raa_profit += la_ref_active.iloc[x,8] * lvn_rate * 8
    elif la_ref_active.iloc[x,2] == 'RN':
        la_raa_profit += la_ref_active.iloc[x,8] * rn_rate * 8

la_avg_ref_profit = la_raa_profit / la_raa_hcp
la_avg_ref_shifts = la_ref_active['LIFETIME_SHIFTS'].mean()

print('Average profits from referred workers in LA: ' + str(la_avg_ref_profit))
print('Average shifts for referred workers in LA: ' + str(la_avg_ref_shifts))

Average profits from referred workers in LA: 784.0176600441501
Average shifts for referred workers in LA: 15.13465783664459


# Calculating return for referral vs non in Nash

In [15]:
# Find active accounts made without a referral
nash_nonref_active = nash_hcp.loc[nash_hcp['REFERRER']=='None'].copy()
nash_nonref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_nonref_active = nash_nonref_active.reset_index(drop=True)
nash_nonref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,6309552077509c01b7d92101,Nashville-Davidson--Murfreesboro--Franklin,CNA,8/26/22,8/26/22,9/2/22,9/22/22,9/24/22,21.0,,17631861264,TN
1,61de4d41001d43018b981d89,Nashville-Davidson--Murfreesboro--Franklin,CNA,1/12/22,1/12/22,3/25/22,3/30/22,3/30/22,10.0,,na,TN
2,630f75ba95230301b59eab06,Nashville-Davidson--Murfreesboro--Franklin,CNA,8/31/22,9/1/22,9/1/22,9/7/22,9/10/22,2.0,,na,TN
3,62307a24dafae501aeed79ca,Nashville-Davidson--Murfreesboro--Franklin,CNA,3/15/22,3/15/22,11/14/22,11/30/22,12/5/22,21.0,,15595984274,TN
4,61f7033bfac74e018ae365ce,Nashville-Davidson--Murfreesboro--Franklin,LVN,1/30/22,1/30/22,1/31/22,3/11/22,3/15/22,20.0,,na,TN


In [16]:
# Calculate avg lifetime value of non-referral active accounts
nash_nraa_profit = 0
nash_nraa_hcp = nash_nonref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_nraa_hcp):
    if nash_nonref_active.iloc[x,2] == 'CNA':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * cna_rate * 8
    elif nash_nonref_active.iloc[x,2] == 'LVN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * lvn_rate * 8
    elif nash_nonref_active.iloc[x,2] == 'RN':
        nash_nraa_profit += nash_nonref_active.iloc[x,8] * rn_rate * 8

nash_avg_nr_profit = nash_nraa_profit / nash_nraa_hcp
nash_avg_nr_shifts = nash_nonref_active['LIFETIME_SHIFTS'].mean()

print('Average profits from nonreferred workers in Nashville: ' + str(nash_avg_nr_profit))
print('Average shifts for nonreferred workers in Nashville: ' + str(nash_avg_nr_shifts))

Average profits from nonreferred workers in Nashville: 912.4528301886793
Average shifts for nonreferred workers in Nashville: 13.264150943396226


In [17]:
# Find active accounts made WITH a referral
nash_ref_active = nash_hcp.loc[nash_hcp['REFERRER']!='None'].copy()
nash_ref_active.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_ref_active = nash_ref_active.reset_index(drop=True)
nash_ref_active.head()

Unnamed: 0,HCP_ID,MSA,QUALIFICATION,CREATED_AT,LICENSE_REVIEWED,ONBOARD_AT,FIRST_CLAIM_TIME,FIRST_SHIFT_TIME,LIFETIME_SHIFTS,REFERRER,CAMPAIGN_KEY,state
0,62eb2d9771188401b586d50d,Nashville-Davidson--Murfreesboro--Franklin,CNA,8/4/22,8/4/22,8/4/22,8/4/22,8/6/22,13.0,62e05d9c4d23f301b50a7a15,na,TN
1,626b813debba6601c3de12f5,Nashville-Davidson--Murfreesboro--Franklin,CNA,4/29/22,5/2/22,6/1/22,6/3/22,6/19/22,21.0,624f7ec79cf773019fcd1adf,na,TN
2,62a025a815ffd101c05b3e5b,Nashville-Davidson--Murfreesboro--Franklin,CNA,6/8/22,6/23/22,9/26/22,9/27/22,9/29/22,8.0,62b1e6b5fdee9601c057e4b8,na,TN
3,636e77f17ccb0c3b06442d5f,Nashville-Davidson--Murfreesboro--Franklin,CNA,11/11/22,11/11/22,11/13/22,11/19/22,11/23/22,8.0,61bcd3d9f9bdaf0186ba122a,na,TN
4,6272ea30e5866a01bee541d8,Nashville-Davidson--Murfreesboro--Franklin,CNA,5/4/22,5/4/22,7/24/22,8/10/22,8/14/22,4.0,62428e9a1cb32a01ade2c3b3,na,TN


In [18]:
nash_raa_profit = 0
nash_raa_hcp = nash_ref_active['HCP_ID'].count()

cna_rate = 5
lvn_rate = 10
rn_rate = 15

for x in np.arange(nash_raa_hcp):
    if nash_ref_active.iloc[x,2] == 'CNA':
        nash_raa_profit += nash_ref_active.iloc[x,8] * cna_rate * 8
    elif nash_ref_active.iloc[x,2] == 'LVN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * lvn_rate * 8
    elif nash_ref_active.iloc[x,2] == 'RN':
        nash_raa_profit += nash_ref_active.iloc[x,8] * rn_rate * 8

nash_avg_ref_profit = nash_raa_profit / nash_raa_hcp
nash_avg_ref_shifts = nash_ref_active['LIFETIME_SHIFTS'].mean()

print('Average profits from referred workers in Nashville: ' + str(nash_avg_ref_profit))
print('Average shifts for referred workers in Nashville: ' + str(nash_avg_ref_shifts))

Average profits from referred workers in Nashville: 690.625
Average shifts for referred workers in Nashville: 13.640625


# Other Referral Program Statistics

In [19]:
# Create LA and Nash dataframes again
la_hcp = cal_hcp.loc[cal_hcp['MSA']=='Los Angeles-Long Beach-Anaheim'].copy()
nash_hcp = ten_hcp.loc[ten_hcp['MSA']=='Nashville-Davidson--Murfreesboro--Franklin'].copy()

In [20]:
# Remove accounts without referrals. Store count of referral accounts
la_hcp = la_hcp.loc[la_hcp['REFERRER'] != 'None']
la_refer_accounts = la_hcp['REFERRER'].count()

nash_hcp = nash_hcp.loc[nash_hcp['REFERRER'] != 'None']
nash_refer_accounts = nash_hcp['REFERRER'].count()

print('Accounts with a referral in LA, including inactive: ' + str(la_refer_accounts))
print('Accounts with a referral in Nashville, including inactive: ' + str(nash_refer_accounts))

Accounts with a referral in LA, including inactive: 620
Accounts with a referral in Nashville, including inactive: 83


In [21]:
# Store number of converted referrals for both LA and Nash
la_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)
nash_hcp.dropna(subset = ['FIRST_SHIFT_TIME'], inplace=True)

la_conv_refers = la_hcp['REFERRER'].count()
nash_conv_refers = nash_hcp['REFERRER'].count()

print('Active accounts with a referral in LA: ' + str(la_conv_refers))
print('Active accounts with a referral in Nashville: ' + str(nash_conv_refers))

Active accounts with a referral in LA: 453
Active accounts with a referral in Nashville: 64


In [22]:
la_ref_conv_rate = la_conv_refers / la_refer_accounts
nash_ref_conv_rate = nash_conv_refers / nash_refer_accounts

print('Percent of accounts with a referral converting to active in LA: ' + str(la_ref_conv_rate))
print('Percent of accounts with a referral converting to active in Nashville: ' + str(nash_ref_conv_rate))

Percent of accounts with a referral converting to active in LA: 0.7306451612903225
Percent of accounts with a referral converting to active in Nashville: 0.7710843373493976


In [23]:
la_ref_workers = la_conv_refers / la_working_accounts
nash_ref_workers = nash_conv_refers / nash_working_accounts

print('Percent of active accounts with a referral in LA: ' + str(la_ref_workers))
print('Percent of active accounts with a referral in Nashville: ' + str(nash_ref_workers))

Percent of active accounts with a referral in LA: 0.3492675404780262
Percent of active accounts with a referral in Nashville: 0.5470085470085471


In [24]:
print('For all of 2022:')
print('')
print('Active accounts in LA: ' + str(round(la_working_accounts, 2)))
print('Active accounts in Nashville: ' + str(round(nash_working_accounts, 2)))
print('')
print('Average profits from nonreferred workers in LA: ' + str(round(la_avg_nr_profit, 2)))
print('Average shifts for nonreferred workers in LA: ' + str(round(la_avg_nr_shifts, 2)))
print('')
print('Average profits from nonreferred workers in Nashville: ' + str(round(nash_avg_nr_profit, 2)))
print('Average shifts for nonreferred workers in Nashville: ' + str(round(nash_avg_nr_shifts, 2)))
print('')
print('Average profits from referred workers in LA: ' + str(round(la_avg_ref_profit, 2)))
print('Average shifts for referred workers in LA: ' + str(round(la_avg_ref_shifts, 2)))
print('')
print('Average profits from referred workers in Nashville: ' + str(round(nash_avg_ref_profit, 2)))
print('Average shifts for referred workers in Nashville: ' + str(round(nash_avg_ref_shifts, 2)))

For all of 2022:

Active accounts in LA: 1297
Active accounts in Nashville: 117

Average profits from nonreferred workers in LA: 946.97
Average shifts for nonreferred workers in LA: 16.63

Average profits from nonreferred workers in Nashville: 912.45
Average shifts for nonreferred workers in Nashville: 13.26

Average profits from referred workers in LA: 784.02
Average shifts for referred workers in LA: 15.13

Average profits from referred workers in Nashville: 690.62
Average shifts for referred workers in Nashville: 13.64
