## cross dataset analysis with covid19 datasets

#### analyze covid19 cases worldwide

In [1]:
%%bigquery
select date, country_region, sum(confirmed) as confirmed, sum(deaths) as deaths, sum(recovered) as recovered, sum(active) as active
from covid19_jhu_csse_modeled.reports r join covid19_jhu_csse_modeled.location l
on r.location_id = l.id
group by date, country_region
order by date desc
limit 12

Unnamed: 0,date,country_region,confirmed,deaths,recovered,active
0,2020-05-02,Bahamas,83,11,24,48
1,2020-05-02,Libya,63,3,22,38
2,2020-05-02,Australia,20041,266,17157,2618
3,2020-05-02,Sudan,1184,82,104,998
4,2020-05-02,Tajikistan,76,2,0,74
5,2020-05-02,Indonesia,21686,1662,3330,16694
6,2020-05-02,France,838585,123704,250948,463933
7,2020-05-02,Guyana,328,36,88,204
8,2020-05-02,US,90203847,4857362,350764,85049933
9,2020-05-02,Japan,58284,1896,12820,43568


analyze mobility changes worldwide

In [2]:
%%bigquery
select date, country_region, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from google_modeled.mobility_event g join google_modeled.location l
on g.location_id = l.id
group by date, country_region
order by date desc
limit 12

Unnamed: 0,date,country_region,retail_and_rec,grocery_and_pharm,parks,transit_stations,workplaces,residential
0,2020-04-26,United Arab Emirates,-49.375,-24.75,-66.0,-59.428571,-41.875,26.857143
1,2020-04-26,New Zealand,-90.4,-38.466667,-72.647059,-81.083333,-43.941176,20.0
2,2020-04-26,Uruguay,-65.384615,-39.8,-73.85,-66.882353,-22.684211,19.0
3,2020-04-26,United States,-31.938215,-10.98595,4.608637,-31.217259,-30.369245,9.894891
4,2020-04-26,Kenya,-47.727273,-39.923077,-28.25,-42.32,-16.678571,21.0
5,2020-04-26,Hungary,-43.15,-24.952381,22.8,-32.952381,-19.47619,5.5625
6,2020-04-26,Slovenia,-80.1,-89.5,-30.666667,-52.4,-33.857143,9.5
7,2020-04-26,Portugal,-76.952381,-50.285714,-65.47619,-74.375,-42.190476,19.363636
8,2020-04-26,Bulgaria,-52.894737,-24.227273,-13.65,-45.181818,-17.172414,6.875
9,2020-04-26,United Kingdom,-77.368794,-35.621622,-17.530769,-55.687943,-45.979866,13.460317


cross dataset queries

In [3]:
%%bigquery
select r.date, l.country_region, sum(confirmed) as confirmed, avg(workplaces) as workplaces, avg(residential) as residential
from covid19_jhu_csse_modeled.reports r 
join covid19_jhu_csse_modeled.location l on r.location_id = l.id
join google_modeled.location gl on gl.country_region = l.country_region
join google_modeled.mobility_event g on g.location_id = gl.id and r.date = g.date
group by date, country_region
order by date desc
limit 12

Unnamed: 0,date,country_region,confirmed,workplaces,residential
0,2020-04-26,Nicaragua,26,-13.0,9.0
1,2020-04-26,Australia,178164,-30.111111,11.777778
2,2020-04-26,Austria,456750,-34.6,8.111111
3,2020-04-26,Denmark,156726,-15.5,4.666667
4,2020-04-26,Jordan,1341,-58.0,23.0
5,2020-04-26,Singapore,54496,-52.0,28.0
6,2020-04-26,Japan,2580672,-23.333333,11.75
7,2020-04-26,Libya,61,-31.0,18.0
8,2020-04-26,Papua New Guinea,16,17.0,
9,2020-04-26,Indonesia,621740,-14.2,15.971429


#### analyze US stats

In [4]:
%%bigquery
select date, sum(confirmed) as confirmed, sum(deaths) as deaths, sum(recovered) as recovered, sum(active) as active
from covid19_jhu_csse_modeled.reports r join covid19_jhu_csse_modeled.location l
on r.location_id = l.id
where country_region = 'US' 
group by date
order by date desc
limit 5

Unnamed: 0,date,confirmed,deaths,recovered,active
0,2020-05-02,90203847,4857362,350764,85049933
1,2020-05-01,87639984,4763349,328030,82605955
2,2020-04-30,84695482,4618875,307894,79820089
3,2020-04-29,82074472,4469789,241440,77363243
4,2020-04-28,79698620,4281761,231872,75184987


In [5]:
%%bigquery
select date, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from google_modeled.mobility_event g join google_modeled.location l
on g.location_id = l.id
where country_region_code = 'US' 
group by date
order by date desc
limit 5

Unnamed: 0,date,retail_and_rec,grocery_and_pharm,parks,transit_stations,workplaces,residential
0,2020-04-26,-31.938215,-10.98595,4.608637,-31.217259,-30.369245,9.894891
1,2020-04-25,-32.901251,-6.931497,18.922652,-27.582016,-25.842702,11.154839
2,2020-04-24,-32.301352,-10.066854,1.854951,-30.05291,-37.474558,18.543513
3,2020-04-23,-32.612216,-9.902439,-4.493741,-29.091985,-38.722934,18.236722
4,2020-04-22,-29.921324,-8.738187,3.526395,-26.725114,-38.433808,17.022523


cross dataset queries

In [6]:
%%bigquery
(select date, province_state as state, sum(j.confirmed) as confirmed, sum(j.deaths) as deaths
from covid19_jhu_csse_modeled.reports j 
join covid19_jhu_csse_modeled.us_location_Beam_DF l on j.location_id = l.id
group by date, province_state)
union all
(select date, state, sum(confirmed_cases) as confirmed, sum(deaths) as deaths
from covid19_usafacts_modeled.reports r 
join covid19_usafacts_modeled.loc_state_Beam_DF s on s.state_fips_code = r.state_fips_code
group by date, state)
order by date desc, state
limit 12

Unnamed: 0,date,state,confirmed,deaths
0,2020-05-02,Alabama,570825,21600
1,2020-05-02,Alabama,7611,288
2,2020-05-02,Alaska,19345,477
3,2020-05-02,Alaska,366,9
4,2020-05-02,Arizona,8364,348
5,2020-05-02,Arizona,250920,9900
6,2020-05-02,Arkansas,283248,6048
7,2020-05-02,Arkansas,3372,72
8,2020-05-02,California,53649,2188
9,2020-05-02,California,5121312,209280


#### look at TX stats

In [7]:
%%bigquery
select date, province_state, sum(confirmed) as confirmed, sum(deaths) as deaths, sum(recovered) as recovered, sum(active) as active
from covid19_jhu_csse_modeled.reports r join covid19_jhu_csse_modeled.location l
on r.location_id = l.id
where province_state  = 'Texas' or province_state like '%TX%'
group by province_state, date
order by date desc, province_state
limit 5

Unnamed: 0,date,province_state,confirmed,deaths,recovered,active
0,2020-05-02,Texas,9862523,275297,0,9587226
1,2020-05-01,Texas,9471748,267960,0,9203788
2,2020-04-30,Texas,9163913,259028,0,8904885
3,2020-04-29,Texas,8694983,240526,0,8454457
4,2020-04-28,Texas,8407883,229361,0,8178522


In [8]:
%%bigquery
select date, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from google_modeled.mobility_event g join google_modeled.location l
on g.location_id = l.id
where sub_region_1 = 'Texas'
group by date, country_region
order by date desc
limit 5

Unnamed: 0,date,retail_and_rec,grocery_and_pharm,parks,transit_stations,workplaces,residential
0,2020-04-26,-24.663043,-10.554348,-0.098039,-26.265957,-26.939189,9.574468
1,2020-04-25,-26.803922,-6.39604,0.862069,-23.816327,-21.313725,11.117647
2,2020-04-24,-25.866667,-8.380952,1.45098,-22.626263,-33.974359,17.024096
3,2020-04-23,-23.061224,-6.291667,11.166667,-20.826531,-33.497462,15.370787
4,2020-04-22,-27.14433,-11.043011,-11.533333,-21.707071,-34.193878,17.102273


cross dataset queries

In [9]:
%%bigquery
(select date, city_county as county, sum(j.confirmed) as confirmed, sum(j.deaths) as deaths
from covid19_jhu_csse_modeled.reports j 
join covid19_jhu_csse_modeled.us_location_Beam_DF l on j.location_id = l.id
where province_state = 'Texas'
group by date, city_county)
union all
(select date, county_name as county, sum(confirmed_cases) as confirmed, sum(deaths) as deaths
from covid19_usafacts_modeled.reports r 
join covid19_usafacts_modeled.loc_state_Beam_DF s on s.state_fips_code = r.state_fips_code
join covid19_usafacts_modeled.loc_county c on c.county_fips_code = r.county_fips_code
where state = 'Texas'
group by date, county_name)
order by date desc
limit 12

Unnamed: 0,date,county,confirmed,deaths
0,2020-05-02,Fort Bend County,1079,26
1,2020-05-02,Ellis County,166,6
2,2020-05-02,Rockwall County,85,3
3,2020-05-02,Ector County,78,4
4,2020-05-02,Shelby County,117,4
5,2020-05-02,Fannin County,19,0
6,2020-05-02,Rusk County,36,1
7,2020-05-02,El Paso County,961,22
8,2020-05-02,Sherman County,17,0
9,2020-05-02,San Augustine County,19,1


#### create views

In [10]:
dataset_id = "covid19_views" #`spry-cosine-266801.

In [11]:
!bq --location=US mk --dataset {dataset_id}

BigQuery error in mk operation: Dataset 'spry-cosine-266801:covid19_views'
already exists.


In [12]:
%%bigquery
create or replace view covid19_views.v_world_cases as
select date, country_region, sum(confirmed) as confirmed, sum(deaths) as deaths, sum(recovered) as recovered, sum(active) as active
from `spry-cosine-266801.covid19_jhu_csse_modeled.reports` r 
join `spry-cosine-266801.covid19_jhu_csse_modeled.location` l
on r.location_id = l.id
group by date, country_region
order by date desc

In [13]:
%%bigquery
create or replace view covid19_views.v_world_mobility as
select date, country_region, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from `spry-cosine-266801.google_modeled.mobility_event` g join `spry-cosine-266801.google_modeled.location` l
on g.location_id = l.id
group by date, country_region
order by date desc

In [14]:
%%bigquery
create or replace view covid19_views.v_world_cross as
select r.date, l.country_region, sum(confirmed) as confirmed, avg(workplaces) as workplaces, avg(residential) as residential
from `spry-cosine-266801.covid19_jhu_csse_modeled.reports` r 
join `spry-cosine-266801.covid19_jhu_csse_modeled.location` l on r.location_id = l.id
join `spry-cosine-266801.google_modeled.location` gl on gl.country_region = l.country_region
join `spry-cosine-266801.google_modeled.mobility_event` g on g.location_id = gl.id and r.date = g.date
group by date, country_region
order by date desc

In [15]:
%%bigquery
create or replace view covid19_views.v_us_mobility as
select date, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from `spry-cosine-266801.google_modeled.mobility_event` g 
join `spry-cosine-266801.google_modeled.location` l
on g.location_id = l.id
where country_region_code = 'US' 
group by date
order by date desc

In [16]:
%%bigquery
create or replace view covid19_views.v_us_cases_by_state as
(select date, province_state as state, sum(j.confirmed) as confirmed, sum(j.deaths) as deaths
from `spry-cosine-266801.covid19_jhu_csse_modeled.reports` j 
join `spry-cosine-266801.covid19_jhu_csse_modeled.us_location_Beam_DF` l on j.location_id = l.id
group by date, province_state)
union all
(select date, state, sum(confirmed_cases) as confirmed, sum(deaths) as deaths
from `spry-cosine-266801.covid19_usafacts_modeled.reports` r 
join `spry-cosine-266801.covid19_usafacts_modeled.loc_state_Beam_DF` s on s.state_fips_code = r.state_fips_code
group by date, state)
order by date desc, state

In [17]:
%%bigquery
create or replace view covid19_views.v_tx_mobility as
select date, avg(retail_and_rec) as retail_and_rec, avg(grocery_and_pharm) as grocery_and_pharm, avg(parks) as parks, avg(transit_stations) as transit_stations, avg(workplaces) as workplaces, avg(residential) as residential
from `spry-cosine-266801.google_modeled.mobility_event` g 
join `spry-cosine-266801.google_modeled.location` l
on g.location_id = l.id
where sub_region_1 = 'Texas'
group by date, country_region
order by date desc

In [18]:
%%bigquery
create or replace view covid19_views.v_tx_cases_by_county as
(select date, city_county as county, sum(j.confirmed) as confirmed, sum(j.deaths) as deaths
from `spry-cosine-266801.covid19_jhu_csse_modeled.reports` j 
join `spry-cosine-266801.covid19_jhu_csse_modeled.us_location_Beam_DF` l on j.location_id = l.id
where province_state = 'Texas'
group by date, city_county)
union all
(select date, county_name as county, sum(confirmed_cases) as confirmed, sum(deaths) as deaths
from `spry-cosine-266801.covid19_usafacts_modeled.reports` r 
join `spry-cosine-266801.covid19_usafacts_modeled.loc_state_Beam_DF` s on s.state_fips_code = r.state_fips_code
join `spry-cosine-266801.covid19_usafacts_modeled.loc_county` c on c.county_fips_code = r.county_fips_code
where state = 'Texas'
group by date, county_name)
order by date desc