In [3]:
import pandas as pd
import numpy as np



First, we will import and sanitize our datasets, starting with the COVID data from Johns Hopkins.



In [4]:
dfCOVID = pd.read_csv('johnshopkins.csv')
dfCOVID.head()



Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,3233,3258,3300,3329,3426,3510,3570,3647,3698,3741
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,10489,10665,10806,10898,11061,11212,11364,11556,11722,11827
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,1264,1269,1272,1275,1292,1296,1309,1318,1330,1336
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,1398,1417,1441,1455,1504,1520,1548,1577,1601,1613
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,3663,3744,3776,3803,3881,3950,4036,4118,4191,4218


The imported COVID dataset has several columns that are not useful for the scope of this project. Let's drop those.

In [6]:
to_drop = ['iso2',
           'iso3',
           'code3',
           'FIPS',
           'Country_Region']

dfCOVID.drop(to_drop, inplace=True, axis=1)

dfCOVID.head()

Unnamed: 0,UID,Admin2,Province_State,Lat,Long_,Combined_Key,1/22/20,1/23/20,1/24/20,1/25/20,...,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20
0,84001001,Autauga,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0,0,0,0,...,3233,3258,3300,3329,3426,3510,3570,3647,3698,3741
1,84001003,Baldwin,Alabama,30.72775,-87.722071,"Baldwin, Alabama, US",0,0,0,0,...,10489,10665,10806,10898,11061,11212,11364,11556,11722,11827
2,84001005,Barbour,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0,0,0,0,...,1264,1269,1272,1275,1292,1296,1309,1318,1330,1336
3,84001007,Bibb,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0,0,0,0,...,1398,1417,1441,1455,1504,1520,1548,1577,1601,1613
4,84001009,Blount,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0,0,0,0,...,3663,3744,3776,3803,3881,3950,4036,4118,4191,4218


The Johns Hopkins team has already assigned a unique identifier to each county - let's make sure we have it set
as the index for our dataframe.

In [7]:
dfCOVID = dfCOVID.set_index('UID')
dfCOVID.head()

Unnamed: 0_level_0,Admin2,Province_State,Lat,Long_,Combined_Key,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,...,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
84001001,Autauga,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0,0,0,0,0,...,3233,3258,3300,3329,3426,3510,3570,3647,3698,3741
84001003,Baldwin,Alabama,30.72775,-87.722071,"Baldwin, Alabama, US",0,0,0,0,0,...,10489,10665,10806,10898,11061,11212,11364,11556,11722,11827
84001005,Barbour,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0,0,0,0,0,...,1264,1269,1272,1275,1292,1296,1309,1318,1330,1336
84001007,Bibb,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0,0,0,0,0,...,1398,1417,1441,1455,1504,1520,1548,1577,1601,1613
84001009,Blount,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0,0,0,0,0,...,3663,3744,3776,3803,3881,3950,4036,4118,4191,4218


That should do it for the COVID set - their data is managed by a highly qualified team, so let's avoid skewing the data
by unnecessarily altering it. Next is the client's staffing data!

In [8]:
dfN7 = pd.read_csv('N7data.csv')
dfN7.head()


Unnamed: 0,employeeID,clientCompany,clientIndustry,startDate,endDate,jobTitle,payrate,markup,billrate,hours
0,1776,Corwin LLC,,10/18/2020,10/18/2020,Account Executive,13.03,39,18.11,1.199923
1,1253,"Breitenberg, Rogahn and Abbott",Real Estate,4/7/2020,4/7/2020,Research Associate,23.28,34,31.2,1.381912
2,1865,Jast Group,,11/18/2020,11/18/2020,Dental Hygienist,10.54,29,13.6,1.452889
3,1065,Mann-Kihn,Major Banks,1/24/2020,1/24/2020,Executive Secretary,16.46,36,22.39,1.674138
4,1880,Conroy-Beer,Major Chemicals,11/25/2020,11/25/2020,Geologist II,12.29,22,14.99,2.265136
