# Overview

We build data loaders for general use as well as model-specific ones. In this notebook you will see how to use them to request data.

In [2]:
from hdt_util import Basic_feeder, ArmadilloV1_feeder
import datetime

## Data Loader for General Use

You can get data about:

    1. confirmed cases from `jhu-csse` database or `usa-facts` database
    2. death cases from `jhu-csse` database or `usa-facts` database
    3. mobility data from `safegraph` database
    4. leading indicators from `fb-survey` database or `doctor-visits` database

In [2]:
loader = Basic_feeder('./request_cache')

## get state level confirmed cases from jhu-csse in October

In [3]:
loader.query_cases(source='jhu-csse', 
                   signal='confirmed', 
                   start_date=datetime.date(2020, 10, 1),
                   end_date=datetime.date(2020, 10, 31), 
                   level='state', 
                   count=True, 
                   cumulated=False)

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,ak,confirmed_incidence_num,2020-10-01,2020-10-30,29,140,,,state,jhu-csse
1,al,confirmed_incidence_num,2020-10-01,2020-10-14,13,1043,,,state,jhu-csse
2,ar,confirmed_incidence_num,2020-10-01,2020-10-14,13,1124,,,state,jhu-csse
3,az,confirmed_incidence_num,2020-10-01,2020-10-14,13,705,,,state,jhu-csse
4,ca,confirmed_incidence_num,2020-10-01,2020-10-14,13,3090,,,state,jhu-csse
...,...,...,...,...,...,...,...,...,...,...
47,vt,confirmed_incidence_num,2020-10-30,2020-10-31,1,14,,,state,jhu-csse
48,wa,confirmed_incidence_num,2020-10-30,2020-10-31,1,1016,,,state,jhu-csse
49,wi,confirmed_incidence_num,2020-10-30,2020-10-31,1,5096,,,state,jhu-csse
50,wv,confirmed_incidence_num,2020-10-30,2020-10-31,1,526,,,state,jhu-csse


## get county level death cases from usa-facts in November

In [4]:
loader.query_cases(source='usa-facts', 
                   signal='deaths', 
                   start_date=datetime.date(2020, 11, 1),
                   end_date=datetime.date(2020, 11, 30), 
                   level='county', 
                   count=True, 
                   cumulated=False)

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,01000,deaths_incidence_num,2020-11-01,2020-11-03,2,0.0,,,county,usa-facts
1,01001,deaths_incidence_num,2020-11-01,2020-11-03,2,0.0,,,county,usa-facts
2,01003,deaths_incidence_num,2020-11-01,2020-11-03,2,0.0,,,county,usa-facts
3,01005,deaths_incidence_num,2020-11-01,2020-11-03,2,0.0,,,county,usa-facts
4,01007,deaths_incidence_num,2020-11-01,2020-11-03,2,0.0,,,county,usa-facts
...,...,...,...,...,...,...,...,...,...,...
3187,56037,deaths_incidence_num,2020-11-29,2020-11-30,1,0.0,,,county,usa-facts
3188,56039,deaths_incidence_num,2020-11-29,2020-11-30,1,0.0,,,county,usa-facts
3189,56041,deaths_incidence_num,2020-11-29,2020-11-30,1,0.0,,,county,usa-facts
3190,56043,deaths_incidence_num,2020-11-29,2020-11-30,1,0.0,,,county,usa-facts


## get state level mobility data from safegraph for September with different signals

In [5]:
loader.query_mobility(signal=1, 
                      start_date=datetime.date(2020, 9, 1), 
                      end_date=datetime.date(2020, 9, 30), 
                      level='state')

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,ak,completely_home_prop,2020-09-01,2020-09-30,29,0.213540,0.004175,519,state,safegraph
1,al,completely_home_prop,2020-09-01,2020-09-30,29,0.193800,0.001156,3434,state,safegraph
2,ar,completely_home_prop,2020-09-01,2020-09-30,29,0.205252,0.001458,2139,state,safegraph
3,as,completely_home_prop,2020-09-01,2020-09-30,29,0.379461,0.087716,6,state,safegraph
4,az,completely_home_prop,2020-09-01,2020-09-30,29,0.285534,0.001290,4153,state,safegraph
...,...,...,...,...,...,...,...,...,...,...
49,vt,completely_home_prop,2020-09-26,2020-09-30,4,0.261621,0.004402,522,state,safegraph
50,wa,completely_home_prop,2020-09-26,2020-09-30,4,0.318655,0.001315,4768,state,safegraph
51,wi,completely_home_prop,2020-09-26,2020-09-30,4,0.265433,0.001271,4475,state,safegraph
52,wv,completely_home_prop,2020-09-26,2020-09-30,4,0.263010,0.002251,1588,state,safegraph


In [6]:
loader.query_mobility(signal=2, 
                      start_date=datetime.date(2020, 9, 1), 
                      end_date=datetime.date(2020, 9, 30), 
                      level='state')

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,ak,full_time_work_prop,2020-09-01,2020-09-30,29,0.048815,0.001470,519,state,safegraph
1,al,full_time_work_prop,2020-09-01,2020-09-30,29,0.068018,0.000621,3434,state,safegraph
2,ar,full_time_work_prop,2020-09-01,2020-09-30,29,0.068502,0.000787,2139,state,safegraph
3,as,full_time_work_prop,2020-09-01,2020-09-30,29,0.119384,0.018801,6,state,safegraph
4,az,full_time_work_prop,2020-09-01,2020-09-30,29,0.042552,0.000420,4153,state,safegraph
...,...,...,...,...,...,...,...,...,...,...
49,vt,full_time_work_prop,2020-09-26,2020-09-30,4,0.036295,0.001153,522,state,safegraph
50,wa,full_time_work_prop,2020-09-26,2020-09-30,4,0.029936,0.000313,4768,state,safegraph
51,wi,full_time_work_prop,2020-09-26,2020-09-30,4,0.027789,0.000308,4475,state,safegraph
52,wv,full_time_work_prop,2020-09-26,2020-09-30,4,0.030007,0.000537,1588,state,safegraph


## get county leading indicator data from facebook in November

In [7]:
loader.query_leading_indicator(source='fb-survey', 
                               signal='smoothed_cli', 
                               start_date=datetime.date(2020, 11, 1), 
                               end_date=datetime.date(2020, 11, 30), 
                               level='county')

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,01000,smoothed_cli,2020-11-01,2020-11-06,5,1.092908,0.199894,1209.3305,county,fb-survey
1,01003,smoothed_cli,2020-11-01,2020-11-06,5,0.728908,0.659655,171.0638,county,fb-survey
2,01073,smoothed_cli,2020-11-01,2020-11-06,5,0.947050,0.501025,204.1320,county,fb-survey
3,01089,smoothed_cli,2020-11-01,2020-11-06,5,1.335795,0.581266,365.9611,county,fb-survey
4,01097,smoothed_cli,2020-11-01,2020-11-06,5,0.960620,0.561501,237.9386,county,fb-survey
...,...,...,...,...,...,...,...,...,...,...
655,55139,smoothed_cli,2020-11-29,2020-11-30,1,1.610003,0.775597,234.5825,county,fb-survey
656,55141,smoothed_cli,2020-11-29,2020-11-30,1,0.497576,0.620701,115.6976,county,fb-survey
657,56000,smoothed_cli,2020-11-29,2020-11-30,1,2.214070,0.506224,486.1960,county,fb-survey
658,56021,smoothed_cli,2020-11-29,2020-11-30,1,2.583695,1.170287,159.9764,county,fb-survey


# Load data for ArmadilloV1

In [3]:
loader = ArmadilloV1_feeder('./request_cache')

In [10]:
#load raw data
data = loader.get_data(source='jhu-csse', 
                       signal='deaths', 
                       start_date=datetime.date(2020, 5, 1),
                       end_date=datetime.date(2020, 8, 1), 
                       level='county', 
                       count=True, 
                       cumulated=False,
                       mobility_level=1)
data.head()

Unnamed: 0,geo_value,date,case_value,time,dayofweek,mobility_value
0,1001,2020-05-01,-1,62,4,0.223168
1,1003,2020-05-01,1,62,4,0.253409
2,1005,2020-05-01,0,62,4,0.219235
3,1007,2020-05-01,0,62,4,0.206663
4,1009,2020-05-01,0,62,4,0.209368


In [11]:
data.tail()

Unnamed: 0,geo_value,date,case_value,time,dayofweek,mobility_value
279561,56037,2020-07-28,0,150,1,0.2465
279562,56039,2020-07-28,0,150,1,0.230085
279563,56041,2020-07-28,0,150,1,0.282966
279564,56043,2020-07-28,0,150,1,0.274957
279565,56045,2020-07-28,0,150,1,0.287944


In [14]:
#filter data to Yolo
filtered_data = loader.area_filter(input=data,
                                   area=['06113'])
filtered_data.head()

Unnamed: 0,geo_value,date,case_value,time,dayofweek,mobility_value
0,6113,2020-05-01,0,62,4,0.430326
1,6113,2020-05-02,0,63,5,0.428025
2,6113,2020-05-03,1,64,6,0.421607
3,6113,2020-05-04,2,65,0,0.42103
4,6113,2020-05-05,0,66,1,0.432966


In [18]:
filtered_data.tail()

Unnamed: 0,geo_value,date,case_value,time,dayofweek,mobility_value
84,6113,2020-07-24,0,146,4,0.301759
85,6113,2020-07-25,0,147,5,0.350235
86,6113,2020-07-26,0,148,6,0.37346
87,6113,2020-07-27,0,149,0,0.341095
88,6113,2020-07-28,2,150,1,0.322936


In [19]:
#summarize data for weeks
weekly_data = loader.pooling(input=filtered_data,
                             period=7,
                             end_date=datetime.date(2020, 8, 10))

In [21]:
weekly_data.tail()

Unnamed: 0,geo_value,time,case_value,mobility_value
9,6113,9,2.0,0.344165
10,6113,10,6.0,0.345335
11,6113,11,3.0,0.340534
12,6113,12,2.0,0.322936
13,6113,13,0.0,0.322936


In [None]:
args = {'M':avg_temp_train['mobility_value'].values,
                    'DC':DC,
                    'y_true':avg_temp_train['case_value'].values}
            
            model.fit(args)