## Create covid_19_modeled dataset

In [2]:
!bq --location=US mk --dataset covid_19_modeled

Dataset 'extracredit-276620:covid_19_modeled' successfully created.


In [7]:
%%bigquery 
create table covid_19_modeled.us_cases_2020
as select null as id, * from covid_19_staging.us_cases_2020

## Location Entity Table

#### Create an identifier for each row based off of state and country

In [10]:
%%bigquery
update covid_19_modeled.us_cases_2020 set id=FARM_FINGERPRINT(concat(state, country))
where state is not null

In [12]:
%%bigquery
select count(*) as null_ids
from covid_19_modeled.us_cases_2020 
where id is null

Unnamed: 0,null_ids
0,0


In [14]:
%%bigquery
select id, state, country
from covid_19_modeled.us_cases_2020
order by state
limit 10

Unnamed: 0,id,state,country
0,2141475585830341968,"Norfolk County, MA",US
1,2141475585830341968,"Norfolk County, MA",US
2,2141475585830341968,"Norfolk County, MA",US
3,2141475585830341968,"Norfolk County, MA",US
4,2141475585830341968,"Norfolk County, MA",US
5,8120215253457855445,Alabama,US
6,8120215253457855445,Alabama,US
7,8120215253457855445,Alabama,US
8,8120215253457855445,Alabama,US
9,8120215253457855445,Alabama,US


#### Separate table by locations using id, state, country, latitude, longitude, fips, admin2, combined_key

In [15]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_1
as select distinct id, state, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.us_cases_2020

In [16]:
%%bigquery
select count(*) as location_count from
covid_19_modeled.location_SQL_1

Unnamed: 0,location_count
0,4128


In [49]:
%%bigquery
select * from covid_19_modeled.location_SQL_1
limit 5

Unnamed: 0,id,state,country,latitude,longitude,fips,admin2,combined_key
0,-5051045546075913948,Iowa,US,42.080117,-92.064636,19011,Benton,"Benton, Iowa, US"
1,-5051045546075913948,Iowa,US,41.899816,-90.532751,19045,Clinton,"Clinton, Iowa, US"
2,-5051045546075913948,Iowa,US,41.218421,-91.261026,19115,Louisa,"Louisa, Iowa, US"
3,-5051045546075913948,Iowa,US,40.920126,-91.183266,19057,Des Moines,"Des Moines, Iowa, US"
4,-5051045546075913948,Iowa,US,42.121186,-91.131396,19105,Jones,"Jones, Iowa, US"


#### Continuation of location entity table production occurs after final event table is created

## Event Entity Table

#### Separate table by events using id, last_update, confirmed, deaths, recovered, active

In [17]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_1
as select id as location_id, last_update, confirmed, deaths, recovered, active
from covid_19_modeled.us_cases_2020

In [18]:
%%bigquery
select count(*) as event_count from
covid_19_modeled.event_SQL_1

Unnamed: 0,event_count
0,122463


#### Check oldest and newest records for date inconsistencies

In [19]:
%%bigquery
select * from covid_19_modeled.event_SQL_1
order by last_update
limit 5

Unnamed: 0,location_id,last_update,confirmed,deaths,recovered,active
0,-7839093902702759077,1/22/2020 17:00,1,,,
1,-7839093902702759077,1/23/20 17:00,1,,,
2,4510293580006593298,1/24/20 17:00,1,,,
3,-7839093902702759077,1/24/20 17:00,1,,,
4,-7839093902702759077,1/25/20 17:00,1,,,


In [21]:
%%bigquery
select * from covid_19_modeled.event_SQL_1
order by last_update desc
limit 5

Unnamed: 0,location_id,last_update,confirmed,deaths,recovered,active
0,-6274337709938639604,4/6/20 23:22,0,0,0,0
1,-391570144277816991,4/6/20 23:22,0,0,0,0
2,-391570144277816991,4/6/20 23:22,0,0,0,0
3,-6274337709938639604,4/6/20 23:22,0,0,0,0
4,-6274337709938639604,4/6/20 23:22,0,0,0,0


#### Separate events with last_update format containing forward slashes into new table

In [23]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_2 as
select *
from covid_19_modeled.event_SQL_1
where strpos(last_update, '/') > 0

In [24]:
%%bigquery
select count(*) as count_ts_slash
from covid_19_modeled.event_SQL_2

Unnamed: 0,count_ts_slash
0,20039


#### Separate events with last_update format containing hyphens into new table

In [25]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_3 as
select location_id, cast(last_update as datetime) last_update, confirmed, deaths, recovered, active 
from covid_19_modeled.event_SQL_1
where strpos(last_update, '-') > 0

In [26]:
%%bigquery
select count(*) as count_ts_hyphen
from covid_19_modeled.event_SQL_3

Unnamed: 0,count_ts_hyphen
0,102424


#### Reformat the forward slash containing last_update values to match lengths

In [27]:
%%bigquery
select last_update, length(split(split(last_update, ' ')[offset(0)], '/')[offset(2)]) as year_length
from
(select distinct last_update
from covid_19_modeled.event_SQL_2)
limit 12

Unnamed: 0,last_update,year_length
0,2/1/2020 19:53,4
1,4/2/20 23:25,2
2,4/4/20 23:34,2
3,4/6/20 23:22,2
4,3/22/20 23:45,2
5,3/28/20 23:05,2
6,3/29/20 23:08,2
7,3/30/20 22:52,2
8,2/1/2020 19:43,4
9,1/24/20 17:00,2


#### Reformat all forward slash containing last_update values to datetime(hyphen) format

In [29]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_4 as
select location_id, parse_datetime('%m/%d/%y %H:%M', last_update) as last_update, confirmed, deaths, recovered, active
from covid_19_modeled.event_SQL_2
where length(split(split(last_update, ' ')[offset(0)], '/')[offset(2)]) = 2
union all
select location_id, parse_datetime('%m/%d/%Y %H:%M', last_update) as last_update, confirmed, deaths, recovered, active
from covid_19_modeled.event_SQL_2
where length(split(split(last_update, ' ')[offset(0)], '/')[offset(2)]) = 4

In [30]:
%%bigquery
select * from
(select distinct last_update 
from covid_19_modeled.event_SQL_4)
order by last_update
limit 5

Unnamed: 0,last_update
0,2020-01-22 17:00:00
1,2020-01-23 17:00:00
2,2020-01-24 17:00:00
3,2020-01-25 17:00:00
4,2020-01-26 16:00:00


In [31]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_5 as
select * from covid_19_modeled.event_SQL_3
union all
select * from covid_19_modeled.event_SQL_4

In [32]:
%%bigquery
select count(*) total_event_count
from covid_19_modeled.event_SQL_5

Unnamed: 0,total_event_count
0,122463


#### Remove duplicate events

In [33]:
%%bigquery
create or replace table covid_19_modeled.event_SQL_5 as
select distinct location_id, last_update, confirmed, deaths, recovered, active 
from covid_19_modeled.event_SQL_5

In [34]:
%%bigquery
select count(*) as total_event_count from covid_19_modeled.event_SQL_5

Unnamed: 0,total_event_count
0,69420


In [35]:
%%bigquery
select location_id, last_update, count(*) as duplicate_events
from covid_19_modeled.event_SQL_5
group by location_id, last_update
having count(*) > 1
order by count(*) desc
limit 5

Unnamed: 0,location_id,last_update,duplicate_events
0,-5615092215703141091,2020-05-04 02:32:28,141
1,-5615092215703141091,2020-05-01 02:32:28,138
2,-5615092215703141091,2020-05-02 02:32:27,138
3,-5615092215703141091,2020-05-03 02:32:28,136
4,-5615092215703141091,2020-04-30 02:32:27,135


In [37]:
%%bigquery
select *, 
rank() over (partition by location_id, last_update order by confirmed desc) as rank
from covid_19_modeled.event_SQL_5
limit 10

Unnamed: 0,location_id,last_update,confirmed,deaths,recovered,active,rank
0,-9160997828604972620,2020-03-07 16:53:03,1,0,0,,1
1,-9160997828604972620,2020-03-09 03:53:03,3,0,0,,1
2,-9159398279919354893,2020-03-10 22:13:11,92,0,1,,1
3,-9159398279919354893,2020-03-11 20:00:00,123,0,1,,1
4,-9159398279919354893,2020-03-11 22:53:03,95,0,1,,1
5,-9159398279919354893,2020-03-12 21:39:09,108,0,1,,1
6,-9159398279919354893,2020-03-14 22:13:19,164,0,1,,1
7,-9159398279919354893,2020-03-14 22:13:19,138,0,1,,2
8,-9159398279919354893,2020-03-16 21:53:03,197,0,1,,1
9,-9159398279919354893,2020-03-17 22:53:02,218,0,1,,1


In [39]:
%%bigquery
create or replace table covid_19_modeled.event_sql_final as
select location_id, last_update, confirmed, deaths, recovered, active
from
(select *, 
rank() over (partition by location_id, last_update order by confirmed desc) as rank
from covid_19_modeled.event_SQL_5)
where rank = 1

##### Records with rank 1 are to be used as the most likely accurate record.

#### Check for remaining duplicates

In [83]:
%%bigquery
select location_id, last_update, count(*) as duplicate_events
from covid_19_modeled.event_sql_final
group by location_id, last_update
having count(*) > 1
order by count(*) desc
limit 5

Unnamed: 0,location_id,last_update,duplicate_events
0,1660675897502087225,2020-04-18 22:32:47,2
1,-4688529123323169636,2020-03-22 23:45:00,2


In [96]:
%%bigquery
select *
from covid_19_modeled.event_sql_final
where location_id=1660675897502087225 and last_update='2020-04-18 22:32:47'

Unnamed: 0,location_id,last_update,confirmed,deaths,recovered,active
0,1660675897502087225,2020-04-18 22:32:47,106,0,0,106
1,1660675897502087225,2020-04-18 22:32:47,106,1,0,105


In [97]:
%%bigquery
select *
from covid_19_modeled.event_sql_final
where location_id=-4688529123323169636 and last_update='2020-03-22 23:45:00'

Unnamed: 0,location_id,last_update,confirmed,deaths,recovered,active
0,-4688529123323169636,2020-03-22 23:45:00,102,2,0,0
1,-4688529123323169636,2020-03-22 23:45:00,102,0,0,0


##### Remove record where active least resembles confirmed-deaths-recovered

In [99]:
%%bigquery
delete from covid_19_modeled.event_sql_final 
where location_id=1660675897502087225 and last_update='2020-04-18 22:32:47' and active=106

##### Remove record where active least resembles confirmed-deaths-recovered

In [100]:
%%bigquery
delete from covid_19_modeled.event_sql_final
where location_id=-4688529123323169636 and last_update='2020-03-22 23:45:00' and deaths=2

#### Check again for duplicates

In [101]:
%%bigquery
select location_id, last_update, count(*) as duplicate_events
from covid_19_modeled.event_sql_final
group by location_id, last_update
having count(*) > 1
order by count(*) desc
limit 5

Unnamed: 0,location_id,last_update,duplicate_events


In [102]:
%%bigquery
select count(*) total_event_count
from covid_19_modeled.event_sql_final

Unnamed: 0,total_event_count
0,3357


In [103]:
%%bigquery
select count(*) distinct_event_count
from
(select distinct location_id, last_update
from covid_19_modeled.event_sql_final)

Unnamed: 0,distinct_event_count
0,3357


In [104]:
%%bigquery
create or replace table covid_19_modeled.event as
select * from covid_19_modeled.event_sql_final

## Continuation of Location Entity Table

#### Check format of state values and separate records that require parsing of state and city into a table

In [50]:
%%bigquery
select id, state, strpos(state, ',') as index, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.location_SQL_1
where strpos(state, ',') > 0
limit 6

Unnamed: 0,id,state,index,country,latitude,longitude,fips,admin2,combined_key
0,8145114016361068042,"Tempe, AZ",6,US,,,,,
1,8145114016361068042,"Tempe, AZ",6,US,33.4255,-111.94,,,
2,8556287708025666455,"Boston, MA",7,US,42.3601,-71.0589,,,
3,8556287708025666455,"Boston, MA",7,US,,,,,
4,-1673719292839792409,"Orange, CA",7,US,33.7879,-117.8531,,,
5,-1673719292839792409,"Orange, CA",7,US,,,,,


In [51]:
%%bigquery
select state as orig_state, split(state, ',')[offset(0)] parsed_state, split(state, ',')[offset(1)] parsed_city,
from covid_19_modeled.location_SQL_1
where strpos(state, ',') > 0
limit 10

Unnamed: 0,orig_state,parsed_state,parsed_city
0,"Tempe, AZ",Tempe,AZ
1,"Tempe, AZ",Tempe,AZ
2,"Boston, MA",Boston,MA
3,"Boston, MA",Boston,MA
4,"Orange, CA",Orange,CA
5,"Orange, CA",Orange,CA
6,"Travis, CA",Travis,CA
7,"Ashland, NE",Ashland,NE
8,"Chicago, IL",Chicago,IL
9,"Chicago, IL",Chicago,IL


In [52]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_2 as
select id, split(state, ',')[offset(0)] city, split(state, ',')[offset(1)] state, country, latitude, longitude, 
fips, admin2, combined_key
from covid_19_modeled.location_SQL_1
where strpos(state, ',') > 0

In [53]:
%%bigquery
select count(*) as city_state_location_count
from covid_19_modeled.location_SQL_2

Unnamed: 0,city_state_location_count
0,153


#### Separate records that not have a city value into a table and set city default null

In [55]:
%%bigquery
select id, state, cast(null as string) as city, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.location_SQL_1
where strpos(state, ',') = 0
limit 5

Unnamed: 0,id,state,city,country,latitude,longitude,fips,admin2,combined_key
0,-5051045546075913948,Iowa,,US,42.080117,-92.064636,19011,Benton,"Benton, Iowa, US"
1,-5051045546075913948,Iowa,,US,41.899816,-90.532751,19045,Clinton,"Clinton, Iowa, US"
2,-5051045546075913948,Iowa,,US,41.218421,-91.261026,19115,Louisa,"Louisa, Iowa, US"
3,-5051045546075913948,Iowa,,US,40.920126,-91.183266,19057,Des Moines,"Des Moines, Iowa, US"
4,-5051045546075913948,Iowa,,US,42.121186,-91.131396,19105,Jones,"Jones, Iowa, US"


In [56]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_3 as
select id, state, cast(null as string) as city, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.location_SQL_1
where strpos(state, ',') = 0

In [58]:
%%bigquery
select count(*) as state_location_count
from covid_19_modeled.location_SQL_2

Unnamed: 0,state_location_count
0,153


#### Union tables to form new location table with uniform state and city format

In [62]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_4 as
(select id, state, city, country, latitude, longitude, fips, admin2, combined_key
 from covid_19_modeled.location_SQL_2
union all
 select id, state, city, country, latitude, longitude, fips, admin2, combined_key
 from covid_19_modeled.location_SQL_3
)

#### Remove duplicates

In [63]:
%%bigquery
select count(*) as locations_count
from covid_19_modeled.location_SQL_4

Unnamed: 0,locations_count
0,4128


In [64]:
%%bigquery
select count(distinct id) as distinct_locations 
from covid_19_modeled.location_SQL_4

Unnamed: 0,distinct_locations
0,199


In [65]:
%%bigquery
select *,
rank() over (partition by id order by combined_key desc) as rank
from covid_19_modeled.location_SQL_4

Unnamed: 0,id,state,city,country,latitude,longitude,fips,admin2,combined_key,rank
0,-9160997828604972620,GA,Cobb County,US,33.899900,-84.564100,,,,1
1,-9159398279919354893,Massachusetts,,US,42.350270,-71.904934,25027.0,Worcester,"Worcester, Massachusetts, US",1
2,-9159398279919354893,Massachusetts,,US,,,90025.0,Unassigned,"Unassigned, Massachusetts, US",2
3,-9159398279919354893,Massachusetts,,US,0.000000,0.000000,,Unassigned,"Unassigned, Massachusetts, US",2
4,-9159398279919354893,Massachusetts,,US,42.327951,-71.078504,25025.0,Suffolk,"Suffolk, Massachusetts, US",4
...,...,...,...,...,...,...,...,...,...,...
4123,9015684915783253640,FL,Manatee County,US,27.479900,-82.345200,,,,1
4124,9146415820156874349,MA,Suffolk County,US,42.360100,-71.058900,,,,1
4125,9151763984302378540,NE,Douglas County,US,41.314800,-96.195100,,,,1
4126,9199521142933741911,NE (From Diamond Princess),Omaha,US,41.254500,-95.975800,,,,1


In [66]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_5 as
select id, city, state, country, latitude, longitude, fips, admin2, combined_key from
(select *, 
rank() over (partition by id order by combined_key desc) as rank
from covid_19_modeled.location_SQL_4)
where rank = 1

#### format state names to state abbr

In [1]:
%%bigquery
select distinct state
from covid_19_modeled.location_SQL_5

Unnamed: 0,state
0,Ohio
1,RI
2,Puerto Rico
3,VA
4,KS
...,...
101,Utah
102,Nebraska
103,Chicago
104,New Mexico


In [5]:
%%bigquery
select id, city, case when state='Louisiana' then 'LA'
    when state='Wisconsin' then 'WI'
    when state='Maine' then 'ME'
    when state='Nebraska' then 'NE'
    when state='Virginia' then 'VA'
    when state='Pennsylvania' then 'PA'
    when state='South Carolina' then 'SC'
    when state='California' then 'CA'
    when state='Arizona' then 'AZ'
    when state='Colorado' then 'CO'
    when state='Georgia' then 'GA'
    when state='New York' then 'NY'
    when state='Mississippi' then 'MS'
    when state='New Jersey' then 'NJ'
    when state='Tennessee' then 'TN'
    when state='Iowa' then 'IA'
    when state='Missouri' then 'MO'
    when state='Washington' then 'WA'
    when state='North Carolina' then 'NC'
    when state='Texas' then 'TX'
    when state='Michigan' then 'MI'
    when state='Indiana' then 'IN'
    when state='Connecticut' then 'CT'
    when state='Vermont' then 'VT'
    when state='Alabama' then 'AL'
    when state='Ohio' then 'OH'
    when state='West Virginia' then 'WV'
    when state='Oregon' then 'OR'
    when state='South Dakota' then 'SD'
    when state='Illinois' then 'IL'
    when state='Kentucky' then 'KY'
    when state='Oklahoma' then 'OK'
    when state='Maryland' then 'MD'
    when state='Massachusetts' then 'MA'
    when state='Kansas' then 'KS'
    when state='Hawaii' then 'HI'
    when state='Delaware' then 'DE'
    when state='New Hampshire' then 'NH'
    when state='Idaho' then 'ID'
    when state='Florida' then 'FL'
    when state='Rhode Island' then 'RI'
    when state='Nevada' then 'NV'
    else state end
, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.location_SQL_5

Unnamed: 0,id,city,f0_,country,latitude,longitude,fips,admin2,combined_key
0,-3953755806128463853,,OH,US,40.843396,-83.307342,39175.0,Wyandot,"Wyandot, Ohio, US"
1,-2784617795373633403,Providence,RI,US,41.824000,-71.412800,,,
2,-1413941052660552455,,Puerto Rico,US,18.220800,-66.590100,,,"Puerto Rico, US"
3,855638470151444294,Fairfax County,VA,US,38.908500,-77.240500,,,
4,4405660497797698759,Johnson County,KS,US,38.845400,-94.852100,,,
...,...,...,...,...,...,...,...,...,...
234,8317715504794932186,Montgomery County,PA,US,40.229000,-75.387900,,,
235,-2060830413033558231,,CT,US,41.829727,-71.981823,9015.0,Windham,"Windham, Connecticut, US"
236,3030157267022202972,Cook County,IL,US,41.737700,-87.697600,,,
237,5966756176918069211,Charlotte County,FL,US,26.894600,-81.909800,,,


In [7]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_6 as
select id, city, state, country, latitude, longitude, fips, admin2, combined_key 
from(
select id, city, case when state='Louisiana' then 'LA'
    when state='Wisconsin' then 'WI'
    when state='Maine' then 'ME'
    when state='Nebraska' then 'NE'
    when state='Virginia' then 'VA'
    when state='Pennsylvania' then 'PA'
    when state='South Carolina' then 'SC'
    when state='California' then 'CA'
    when state='Arizona' then 'AZ'
    when state='Colorado' then 'CO'
    when state='Georgia' then 'GA'
    when state='New York' then 'NY'
    when state='Mississippi' then 'MS'
    when state='New Jersey' then 'NJ'
    when state='Tennessee' then 'TN'
    when state='Iowa' then 'IA'
    when state='Missouri' then 'MO'
    when state='Washington' then 'WA'
    when state='North Carolina' then 'NC'
    when state='Texas' then 'TX'
    when state='Michigan' then 'MI'
    when state='Indiana' then 'IN'
    when state='Connecticut' then 'CT'
    when state='Vermont' then 'VT'
    when state='Alabama' then 'AL'
    when state='Ohio' then 'OH'
    when state='West Virginia' then 'WV'
    when state='Oregon' then 'OR'
    when state='South Dakota' then 'SD'
    when state='Illinois' then 'IL'
    when state='Kentucky' then 'KY'
    when state='Oklahoma' then 'OK'
    when state='Maryland' then 'MD'
    when state='Massachusetts' then 'MA'
    when state='Kansas' then 'KS'
    when state='Hawaii' then 'HI'
    when state='Delaware' then 'DE'
    when state='New Hampshire' then 'NH'
    when state='Idaho' then 'ID'
    when state='Florida' then 'FL'
    when state='Rhode Island' then 'RI'
    when state='Nevada' then 'NV'
    else state end as state
, country, latitude, longitude, fips, admin2, combined_key
from covid_19_modeled.location_SQL_5
)

In [8]:
%%bigquery
select *
from covid_19_modeled.location_SQL_6
limit 5

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,5768702016008334817,,US,US,37.0902,-95.7129,,,
1,2595497680336258825,Pinal County,AZ,US,32.8162,-111.2845,,,
2,2115684815449973964,Maricopa County,AZ,US,33.2918,-112.4291,,,
3,8145114016361068042,Tempe,AZ,US,,,,,
4,8145114016361068042,Tempe,AZ,US,33.4255,-111.94,,,


##### Records with rank 1 are to be used as the most likely accurate record

In [9]:
%%bigquery
create or replace table covid_19_modeled.location_SQL_final as
select id, city, state, country, latitude, longitude, fips, admin2, combined_key from
(select *, 
rank() over (partition by id order by latitude desc, longitude desc) as rank
from covid_19_modeled.location_SQL_6)
where rank = 1

#### Check again for duplicates

In [10]:
%%bigquery
select count(*) as locations 
from covid_19_modeled.location_SQL_final

Unnamed: 0,locations
0,204


In [11]:
%%bigquery
select count(distinct id) as distinct_locations
from covid_19_modeled.location_SQL_final

Unnamed: 0,distinct_locations
0,199


In [12]:
%%bigquery
select id, count(*) as duplicate_events
from covid_19_modeled.location_SQL_final
group by id
having count(*) > 1
order by count(*) desc
limit 5

Unnamed: 0,id,duplicate_events
0,3620381022065474036,3
1,-7958264304576551061,2
2,-1413941052660552455,2
3,3966931624883821615,2


In [13]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=3620381022065474036

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,3620381022065474036,,Northern Mariana Islands,US,15.0979,145.6739,69000.0,,"Northern Mariana Islands, US"
1,3620381022065474036,,Northern Mariana Islands,US,15.0979,145.6739,,,"Northern Mariana Islands, US"
2,3620381022065474036,,Northern Mariana Islands,US,15.0979,145.6739,69.0,,"Northern Mariana Islands, US"


In [14]:
%%bigquery
delete from covid_19_modeled.location_SQL_final
where id=3620381022065474036 and (fips=69000.0 or fips is null)

In [15]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=3620381022065474036

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,3620381022065474036,,Northern Mariana Islands,US,15.0979,145.6739,69,,"Northern Mariana Islands, US"


In [16]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=-7958264304576551061

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,-7958264304576551061,,Guam,US,13.4443,144.7937,66,,"Guam, US"
1,-7958264304576551061,,Guam,US,13.4443,144.7937,66000,,"Guam, US"


In [17]:
%%bigquery
delete from covid_19_modeled.location_SQL_final
where id=-7958264304576551061 and (fips=66000)

In [18]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=-7958264304576551061

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,-7958264304576551061,,Guam,US,13.4443,144.7937,66,,"Guam, US"


In [19]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=3966931624883821615

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,3966931624883821615,,Virgin Islands,US,18.3358,-64.8963,78,,"Virgin Islands, US"
1,3966931624883821615,,Virgin Islands,US,18.3358,-64.8963,78000,,"Virgin Islands, US"


In [20]:
%%bigquery
delete from covid_19_modeled.location_SQL_final
where id=3966931624883821615 and fips=78000

In [21]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=3966931624883821615

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,3966931624883821615,,Virgin Islands,US,18.3358,-64.8963,78,,"Virgin Islands, US"


In [22]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=-1413941052660552455

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,-1413941052660552455,,Puerto Rico,US,18.2208,-66.5901,72.0,,"Puerto Rico, US"
1,-1413941052660552455,,Puerto Rico,US,18.2208,-66.5901,,,"Puerto Rico, US"


In [23]:
%%bigquery
delete from covid_19_modeled.location_SQL_final
where id=-1413941052660552455 and fips is null

In [24]:
%%bigquery
select *
from covid_19_modeled.location_SQL_final
where id=-1413941052660552455

Unnamed: 0,id,city,state,country,latitude,longitude,fips,admin2,combined_key
0,-1413941052660552455,,Puerto Rico,US,18.2208,-66.5901,72,,"Puerto Rico, US"


#### Check for duplicates one last time with id as primary key

In [25]:
%%bigquery
select count(*)
from covid_19_modeled.location_SQL_final

Unnamed: 0,f0_
0,199


In [26]:
%%bigquery
select count(distinct id)
from covid_19_modeled.location_SQL_final

Unnamed: 0,f0_
0,199


In [27]:
%%bigquery
create or replace table covid_19_modeled.location as
select *
from covid_19_modeled.location_SQL_final