In [1]:
import pandas as pd
import numpy as np
data1 = "1_paid_marketing.csv"
data2 = "2_hcp_data.csv"
data3 = "3_shifts_data.csv"
campaigns = pd.read_csv(data1)
hcp = pd.read_csv(data2)
shifts = pd.read_csv(data3)

In [4]:
shifts.head()

Unnamed: 0,MSA,AGENT_REQ,MONTH,POSTED_SHIFTS,FILLED_SHIFTS
0,"Atlanta-Sandy Springs-Alpharetta, GA",CNA,9/1/22,1109.0,659.0
1,"Atlanta-Sandy Springs-Alpharetta, GA",CNA,10/1/22,1562.0,963.0
2,"Atlanta-Sandy Springs-Alpharetta, GA",CNA,11/1/22,1453.0,1092.0
3,"Atlanta-Sandy Springs-Alpharetta, GA",LVN,9/1/22,539.0,328.0
4,"Atlanta-Sandy Springs-Alpharetta, GA",LVN,10/1/22,793.0,699.0


In [5]:
shifts.columns

Index(['MSA', 'AGENT_REQ', 'MONTH', 'POSTED_SHIFTS', 'FILLED_SHIFTS'], dtype='object')

In [6]:
shifts.count()

MSA              268
AGENT_REQ        268
MONTH            268
POSTED_SHIFTS    268
FILLED_SHIFTS    268
dtype: int64

In [7]:
shifts.dtypes

MSA               object
AGENT_REQ         object
MONTH             object
POSTED_SHIFTS    float64
FILLED_SHIFTS    float64
dtype: object

In [8]:
# Check for null values
print(pd.to_datetime(shifts['MONTH'], errors='coerce').isnull().value_counts())

False    268
True       1
Name: MONTH, dtype: int64


In [9]:
# Drop empty row, verify month column can convert to datetime
shifts = shifts.dropna(how='any')
print(pd.to_datetime(shifts['MONTH'], errors='coerce').isnull().value_counts())

False    268
Name: MONTH, dtype: int64


In [10]:
# Convert month column to datetime
shifts.MONTH = pd.to_datetime(shifts['MONTH'], errors='coerce')

print(shifts.isnull().sum())
print(shifts.shape)
print(shifts.dtypes)

MSA              0
AGENT_REQ        0
MONTH            0
POSTED_SHIFTS    0
FILLED_SHIFTS    0
dtype: int64
(268, 5)
MSA                      object
AGENT_REQ                object
MONTH            datetime64[ns]
POSTED_SHIFTS           float64
FILLED_SHIFTS           float64
dtype: object


In [11]:
# Check for any duplicate cities
shifts['MSA'].value_counts()

Riverside-San Bernardino-Ontario, CA              9
Kansas City, MO-KS                                9
Pittsburgh, PA                                    9
Dallas-Fort Worth-Arlington, TX                   9
Cincinnati, OH-KY-IN                              9
New York-Newark-Jersey City, NY-NJ-PA             9
Boston-Cambridge-Newton, MA-NH                    9
San Jose-Sunnyvale-Santa Clara, CA                9
Minneapolis-St. Paul-Bloomington, MN-WI           9
Nashville-Davidson--Murfreesboro--Franklin, TN    9
San Francisco-Oakland-Berkeley, CA                9
Sacramento-Roseville-Folsom, CA                   9
Hartford-East Hartford-Middletown, CT             9
Atlanta-Sandy Springs-Alpharetta, GA              9
San Antonio-New Braunfels, TX                     9
Springfield, MA                                   9
Canton-Massillon, OH                              9
St. Louis, MO-IL                                  9
Las Vegas-Henderson-Paradise, NV                  9
Los Angeles-

In [13]:
# Create pivot table to sum up all filled and posted shifts of each MSA
shifts = pd.pivot_table(shifts, index = ['MSA'], aggfunc=np.sum)
shifts

Unnamed: 0_level_0,FILLED_SHIFTS,POSTED_SHIFTS
MSA,Unnamed: 1_level_1,Unnamed: 2_level_1
"Atlanta-Sandy Springs-Alpharetta, GA",4469.0,6266.0
"Boston-Cambridge-Newton, MA-NH",1860.0,3085.0
"Canton-Massillon, OH",812.0,1114.0
"Chico, CA",1356.0,2591.0
"Cincinnati, OH-KY-IN",1834.0,2813.0
"Columbus, OH",1601.0,1949.0
"Dallas-Fort Worth-Arlington, TX",4365.0,5629.0
"Denver-Aurora-Lakewood, CO",1510.0,1868.0
"Detroit-Warren-Dearborn, MI",725.0,1084.0
"Grand Rapids-Kentwood, MI",975.0,1625.0


In [17]:
# Find fill rate for each MSA
fill_rate = shifts['FILLED_SHIFTS'] / shifts['POSTED_SHIFTS']
shifts['FILL_RATE'] = fill_rate
shifts

Unnamed: 0_level_0,FILLED_SHIFTS,POSTED_SHIFTS,FILL_RATE
MSA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Atlanta-Sandy Springs-Alpharetta, GA",4469.0,6266.0,0.713214
"Boston-Cambridge-Newton, MA-NH",1860.0,3085.0,0.602917
"Canton-Massillon, OH",812.0,1114.0,0.728905
"Chico, CA",1356.0,2591.0,0.52335
"Cincinnati, OH-KY-IN",1834.0,2813.0,0.651973
"Columbus, OH",1601.0,1949.0,0.821447
"Dallas-Fort Worth-Arlington, TX",4365.0,5629.0,0.775449
"Denver-Aurora-Lakewood, CO",1510.0,1868.0,0.808351
"Detroit-Warren-Dearborn, MI",725.0,1084.0,0.668819
"Grand Rapids-Kentwood, MI",975.0,1625.0,0.6


In [18]:
# Calculate combined fill rate of all MSA 
total_filled = shifts['FILLED_SHIFTS'].sum()
total_posted = shifts['POSTED_SHIFTS'].sum()
print(total_filled / total_posted)

0.7937024972855592
