In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("..")
from test_funcs import fips_lookup
import random
from datetime import date, datetime, timedelta
from pprint import pprint

In [3]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

In [4]:
covid_data = pd.read_pickle("../data/nyt_enriched.pkl", compression="bz2")

In [5]:
covid_data

Unnamed: 0,fips,date,daily_cases,cumulative_cases,daily_deaths,cumulative_deaths,population
0,01001,2020-03-24,1,1,0,0,55869
1,01001,2020-03-25,3,4,0,0,55869
2,01001,2020-03-26,2,6,0,0,55869
3,01001,2020-03-27,0,6,0,0,55869
4,01001,2020-03-28,0,6,0,0,55869
...,...,...,...,...,...,...,...
533995,56045,2020-09-15,0,23,0,0,6927
533996,56045,2020-09-16,0,23,0,0,6927
533997,56045,2020-09-17,0,23,0,0,6927
533998,56045,2020-09-18,0,23,0,0,6927


In [6]:
covid_data.dtypes

fips                         object
date                 datetime64[ns]
daily_cases                   int64
cumulative_cases              int64
daily_deaths                  int64
cumulative_deaths             int64
population                    int64
dtype: object

In [7]:
covid_data["date"].min()

Timestamp('2020-01-21 00:00:00')

In [8]:
covid_data["date"].max()

Timestamp('2020-09-19 00:00:00')

In [9]:
# test for uniqueness of fips+date
total_rows = len(covid_data)
composite_key = covid_data["date"].astype("str") + covid_data["fips"]
unique_date_plus_fips = len(composite_key.unique())
assert total_rows == unique_date_plus_fips, \
f"Total rows ({total_rows}) and unique date+fips counts ({unique_date_plus_fips}) do not match"

In [10]:
# build out the list of valid dates in the file
start_date = datetime(2020, 1, 21)
t = date.today()
end_date = datetime(t.year, t.month, t.day) - timedelta(days=1)

valid_dates = []
curr_date = start_date
while curr_date <= end_date:
    valid_dates.append(curr_date)
    curr_date = curr_date + timedelta(days=1)

print(f"Start: {start_date}")
print(f"End  : {end_date}")
print(f"Min  : {min(valid_dates)}")
print(f"Max  : {max(valid_dates)}")
print(f"Count: {len(valid_dates)}")

Start: 2020-01-21 00:00:00
End  : 2020-09-19 00:00:00
Min  : 2020-01-21 00:00:00
Max  : 2020-09-19 00:00:00
Count: 243


In [11]:
# get unique set of fips codes that we can randomly select from
fips_codes = set(covid_data["fips"])

In [12]:
# generate 100 random combinations of fips code + date and perform the lookup
for f in random.sample(fips_codes, 100):
    d = random.choice(valid_dates)
    results = fips_lookup(f, d, covid_data)
    print(f"{f} {str(d)[:10]}")
    pprint(results)
    print()

01057 2020-02-07
{}

51087 2020-08-14
{'cumulative_cases': 4042,
 'cumulative_deaths': 187,
 'daily_cases': 43,
 'daily_deaths': 0,
 'population': 330818}

40083 2020-03-31
{'cumulative_cases': 4,
 'cumulative_deaths': 0,
 'daily_cases': 0,
 'daily_deaths': 0,
 'population': 48011}

51059 2020-03-01
{}

48133 2020-04-29
{'cumulative_cases': 3,
 'cumulative_deaths': 0,
 'daily_cases': 0,
 'daily_deaths': 0,
 'population': 18360}

26137 2020-08-16
{'cumulative_cases': 146,
 'cumulative_deaths': 11,
 'daily_cases': 0,
 'daily_deaths': 0,
 'population': 24668}

13083 2020-03-06
{}

20047 2020-04-15
{}

26089 2020-08-25
{'cumulative_cases': 84,
 'cumulative_deaths': 1,
 'daily_cases': 0,
 'daily_deaths': 1,
 'population': 21761}

39097 2020-08-19
{'cumulative_cases': 605,
 'cumulative_deaths': 11,
 'daily_cases': 4,
 'daily_deaths': 0,
 'population': 44731}

28025 2020-08-03
{'cumulative_cases': 370,
 'cumulative_deaths': 13,
 'daily_cases': 2,
 'daily_deaths': -1,
 'population': 19316}

48

33013 2020-07-03
{'cumulative_cases': 417,
 'cumulative_deaths': 18,
 'daily_cases': 3,
 'daily_deaths': 0,
 'population': 151391}

08079 2020-04-15
{'cumulative_cases': 2,
 'cumulative_deaths': 0,
 'daily_cases': 0,
 'daily_deaths': 0,
 'population': 769}

39163 2020-03-25
{}

48001 2020-05-07
{'cumulative_cases': 40,
 'cumulative_deaths': 0,
 'daily_cases': 2,
 'daily_deaths': 0,
 'population': 57735}

12039 2020-02-04
{}

55097 2020-09-17
{'cumulative_cases': 1071,
 'cumulative_deaths': 3,
 'daily_cases': 34,
 'daily_deaths': 0,
 'population': 70772}



In [13]:
# examine cook county
covid_data[covid_data["fips"] == "17031"]

Unnamed: 0,fips,date,daily_cases,cumulative_cases,daily_deaths,cumulative_deaths,population
106747,17031,2020-01-24,1,1,0,0,5150233
106748,17031,2020-01-25,0,1,0,0,5150233
106749,17031,2020-01-26,0,1,0,0,5150233
106750,17031,2020-01-27,0,1,0,0,5150233
106751,17031,2020-01-28,0,1,0,0,5150233
106752,17031,2020-01-29,0,1,0,0,5150233
106753,17031,2020-01-30,1,2,0,0,5150233
106754,17031,2020-01-31,0,2,0,0,5150233
106755,17031,2020-02-01,0,2,0,0,5150233
106756,17031,2020-02-02,0,2,0,0,5150233


In [14]:
# lookup cook county
# google "cook county covid counts" and scroll down a bit to find the numbers to validate against
f = "17031"
d = end_date
results = fips_lookup(f, d, covid_data)
print(f"{f} {str(d)[:10]}")
pprint(results)
print()

17031 2020-09-19
{'cumulative_cases': 138576,
 'cumulative_deaths': 5157,
 'daily_cases': 996,
 'daily_deaths': 7,
 'population': 5150233}



In [15]:
# lookup los angeles county
# google "los angeles county covid counts" and scroll down a bit to find the numbers to validate against
f = "06037"
d = end_date
results = fips_lookup(f, d, covid_data)
print(f"{f} {str(d)[:10]}")
pprint(results)
print()

06037 2020-09-19
{'cumulative_cases': 259817,
 'cumulative_deaths': 6330,
 'daily_cases': 1301,
 'daily_deaths': 0,
 'population': 10039107}



In [16]:
# lookup king county, washington
# google "king county covid counts" and scroll down a bit to find the numbers to validate against
f = "53033"
d = end_date
results = fips_lookup(f, d, covid_data)
print(f"{f} {str(d)[:10]}")
pprint(results)
print()

53033 2020-09-19
{'cumulative_cases': 21418,
 'cumulative_deaths': 770,
 'daily_cases': 95,
 'daily_deaths': 0,
 'population': 2252782}

