# Importing Cov2 Confirmed Cases

## Dependencies

In [2]:
# basic stuff
import psycopg2
import pandas as pd
import psycopg2.extras
import os
import numpy
from config import (census_key, gkey)
import gmaps
import requests
from ipywidgets.embed import embed_minimal_html
from pprint import pprint

# Imports the method used to connect to DBs
from sqlalchemy import create_engine

# function to establish a session with a connected database
from sqlalchemy.orm import Session

# database compliant datatypes
from sqlalchemy import Column, Integer, String, Float

## Setup PostgreSQL Connection

In [25]:
# password is hard-coded in the connection string as "postgres"
engine = create_engine('postgresql://postgres:postgres@localhost:5432/covid_db')

## Import Confirmed Cases


In [31]:
# confirmed case data
raw_file = os.path.join("..","data","raw","covid_confirmed_usafacts.csv")

# pandas read csv to dataframe
raw_df = pd.read_csv(raw_file, encoding="ISO-8859-1")

# preview the raw data
raw_df.head()

Unnamed: 0,countyFIPS,County Name,State,stateFIPS,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,...,4/8/2020,4/9/2020,4/10/2020,4/11/2020,4/12/2020,4/13/2020,4/14/2020,4/15/2020,4/16/2020,4/17/2020
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,12,17,17,19,19,19,23,25,25,26
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,49,59,59,66,71,78,87,98,102,103
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,3,7,9,10,10,9,11,13,14,15
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,9,11,11,13,16,17,17,19,23,23


## Rename some columns

In [32]:
# set names to table column names
raw_df = raw_df.rename(columns={
    'countyFIPS': 'county_fips',
    'County Name': 'county_name',
    'State': 'state_name',
    'stateFIPS': 'state_fips'
})

# checking
raw_df.head()

Unnamed: 0,county_fips,county_name,state_name,state_fips,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,...,4/8/2020,4/9/2020,4/10/2020,4/11/2020,4/12/2020,4/13/2020,4/14/2020,4/15/2020,4/16/2020,4/17/2020
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,12,17,17,19,19,19,23,25,25,26
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,49,59,59,66,71,78,87,98,102,103
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,3,7,9,10,10,9,11,13,14,15
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,9,11,11,13,16,17,17,19,23,23


## Upload to PostgreSQL


In [33]:
# arrays for the new dataframe
countyFIPS = raw_df['county_fips']
countyNames = raw_df['county_name']
states = raw_df['state_name']
stateFIPS = raw_df['state_fips']

# column counter
col = 0

# iterate over the columns
testDates = []
for columnName, columnValues in raw_df.iteritems():
    # increment column counter
    col+=1
    
    # if the column is past the 4th position, then it is a fact column
    if col > 4:
        testDates.append(columnName)

# loop through dates, put together dataset
for d in testDates:

    # array of the confirmed cases for the date (d)
    confirmedCases = raw_df[d]
    
    # create a new dataframe of all the arrays
    new_df = pd.DataFrame({
        'county_fips': countyFIPS,
        'county_name': countyNames,
        'state_name': states,
        'state_fips': stateFIPS,
        'test_date': d,
        'confirmed_cases': confirmedCases
    })
    
    # write each data frame to the sql staging table
    new_df.to_sql('staging_confirmed_cases', con=engine, if_exists='append', index=False)


## Update Main Tables
### df from view of unique counties

In [34]:
# make dataframe out of county staging view
county_df = pd.read_sql_query('select * from "staging_county_list"',con=engine)

county_df.head()

Unnamed: 0,county_fips,state_fips,county_name
0,48467,48,Van Zandt County
1,28059,28,Jackson County
2,55019,55,Clark County
3,31185,31,York County
4,39119,39,Muskingum County


### write unique counties to county table

In [35]:
# writing to county table
county_df.to_sql('county', con=engine, if_exists='append', index=False)

### df from view of unique states

In [37]:
# make dataframe out of county staging view
state_df = pd.read_sql_query('select * from "staging_state_list"',con=engine)

state_df.head()

Unnamed: 0,state_fips,state_name
0,1,AL
1,51,VA
2,12,FL
3,10,DE
4,19,IA


### write to state table

In [39]:
# writing to state table
state_df.to_sql('states', con=engine, if_exists='append', index=False)

In [40]:
# pull from real county view
county_df = pd.read_sql_query('select * from "county_list"',con=engine)

county_df.head()

Unnamed: 0,county_fips,state_fips,state_id,county_name,latitude,longitude
0,1117,1,AL,Shelby County,,
1,1021,1,AL,Chilton County,,
2,1113,1,AL,Russell County,,
3,1041,1,AL,Crenshaw County,,
4,1051,1,AL,Elmore County,,


In [50]:
# to catch missing counties
missing_counties = []

# loop through counties
for index, row in county_df.iterrows():
    
    # target address is the county
    county_fips = row['county_fips']
    county_name = row['county_name']
    state_id = row['state_id']
    
    # setup URL
    target_url = 'https://maps.googleapis.com/maps/api/geocode/json?components=locality:' + county_name.replace(' ','%20') + '|state:' + state_id + '|country:US&key=' + gkey
    
#     print(target_url)
    
    # geo-codin'
    geo_data = requests.get(target_url).json()
#     geo_data = []
    
    # try to extract lat/long
    try:
        
        # Extract latitude and longitude
        lat = geo_data["results"][0]["geometry"]["location"]["lat"]
        lng = geo_data["results"][0]["geometry"]["location"]["lng"] 
        
        # update the database
        sql = "UPDATE county SET ""latitude"" = " + str(lat) + ", ""longitude"" = " + str(lng) + " WHERE ""county_fips"" = " + county_fips + ";"
#         engine.execute(sql)

        print(sql)
    
    except:
        
        # append to missing counties
        missing_counties.append(county_fips)

# missing_counties

# print(sql)
        