In [None]:
# Dependencies
import pandas as pd
import geopandas as gpd
import requests
from census import Census
from us import states
import fiona
from sqlalchemy import create_engine
from geoalchemy2 import Geometry, WKTElement

# Census API Key
from config import api_key
c = Census(api_key, year=2017)

In [None]:
# label documentation: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b

# population: B01003_001E
# median age: B01002_001E
# male population over 65: B01001_020E - B01001_025E
# female population over 65: B01001_044E - B01001_049E


m_census_data = c.acs5.get(('NAME','B01003_001E', 'B01001_001E','B01002_001E', 'B01001_002E', 'B01001_020E', 'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E', 'B01001_025E',
                           'B01001_044E', 'B01001_045E', 'B01001_046E', 'B01001_047E', 'B01001_048E', 'B01001_049E'), geo={'for': 'county:*',
                       'in': 'state:27'.format(states.MN.fips)}) #  county:053  &for=county:*&in=state:01

In [None]:
m_census_data
# Convert to DataFrame
m_census_pd = pd.DataFrame(m_census_data)
m_census_pd.head()

In [None]:
# add up individual age groups to get population of all people over age 65
m_census_pd["TotalAgeOvr65"] = m_census_pd["B01001_020E"] + m_census_pd["B01001_021E"] + m_census_pd["B01001_022E"]+ m_census_pd["B01001_023E"] + m_census_pd["B01001_024E"] + m_census_pd["B01001_025E"] + m_census_pd["B01001_044E"] + m_census_pd["B01001_045E"] + m_census_pd["B01001_046E"] + m_census_pd["B01001_047E"] + m_census_pd["B01001_048E"] + m_census_pd["B01001_049E"]

m_census_pd['fips_concate'] = m_census_pd['state'].astype(str) + m_census_pd['county'].astype(str)
# trim down to jsut the columns we need
m_census_pd = m_census_pd[['NAME', 'B01003_001E', 'B01002_001E', 'TotalAgeOvr65', 'state', 'county', 'fips_concate']]
m_census_pd.head()

In [None]:
# Column Reordering
m_census_pd = m_census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
#                                       "B17001_002E": "Poverty count",
#                                       "B19013_001E": "Median Household Income",
#                                       "B01001_001E" : "Age Over 65",
                                      "NAME": "Name", "tract": "Census Tract"})

# # Add in Poverty Rate (Poverty Count / Population)
# m_census_pd["Poverty Rate"] = 100 * \
#     m_census_pd["Poverty count"].astype(
#         int) / m_census_pd["Population"].astype(int)

# Add in Pct age over 65
m_census_pd["Percent Age Ovr 65"] = round(m_census_pd["TotalAgeOvr65"].astype(
        float) / m_census_pd["Population"].astype(float), 2)

# convert county FIPS to int
m_census_pd["county"] = m_census_pd["county"].astype(int)

m_census_pd.head()

In [None]:
m_census_pd.to_csv("../data/CountiesData_noGeometry.csv")

In [None]:
## Read in county boundaries shapefile
## source: https://gisdata.mn.gov/dataset/bdry-counties-in-minnesota
shp = "mn_county_boundary.shp" 
# create a geo-dataframe
counties = gpd.read_file(shp)
counties.head()
print(len(counties))

In [None]:
# merge the census table to the geo-dataframe 
counties_ACS = counties.merge(m_census_pd, left_on='CTY_FIPS', right_on='county' )

# Set the projection of the counties to be web-readable
counties_ACS = counties_ACS.to_crs("EPSG:4326")
counties_ACS.head(87)

In [None]:

gdf = gpd.read_file("../data/COVID19_Cases_US.geojson")
# gjson = "../data/COVID19_Cases_US.geojson" 
# covid = gpd.read_file(gjson)
mn_covid = gdf[(gdf['Province_State'] == "Minnesota")]

mn_covid = mn_covid[['Province_State', 'Last_Update', 'Lat', 'Long_', 'Confirmed', 'Recovered', 'Deaths', 'Active', 'Combined_Key', 'FIPS', 'Admin2']]
# mn_covid_df = pd.DataFrame(mn_covid.drop(columns='geometry'))
print(len(mn_covid))
mn_covid.head()

In [None]:
# merge the census table to the geo-dataframe 
counties_covid = counties_ACS.merge(mn_covid, left_on='fips_concate', right_on='FIPS', how="left" )

# Set the projection of the counties to be web-readable
# counties_covid = counties_covid.to_crs("EPSG:4326")
counties_covid.head()

In [None]:
# confrim projection is 4326
print(counties_covid.crs)
print(len(counties_covid))

In [None]:
# export geodataframe as a geojson file for web-mapping
# counties_covid.to_file("../data/MN_counties.geojson", driver='GeoJSON')
counties_covid.to_file("../data/MN_counties.geojson", driver='GeoJSON')
# counties.to_file("MN_counties.shp")

In [None]:
# # drop the geometry column and export data into csv for database and charting
# counties_ACS.drop(['geometry'], axis=1)
# counties_ACS.to_csv("../data/CountiesData.csv")

In [None]:
##Testing pushing geodata to postgres
# connection_string = "postgres:postgres@localhost:5432/covid_db"
# engine = create_engine(f'postgresql://{connection_string}')

In [None]:
##Testing pushing geodata to postgres
# engine.table_names()

In [None]:
##Testing pushing geodata to postgres
# # https://gis.stackexchange.com/questions/239198/adding-geopandas-dataframe-to-postgis-table
# counties_covid['geom'] = counties_covid['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))

# #drop the geometry column as it is now duplicative
# counties_covid.drop('geometry', 1, inplace=True)

In [None]:
##Testing pushing geodata to postgres
# counties_covid.to_sql('counties_geometry', engine, if_exists='append', index=True, 
#                          dtype={'geom': Geometry('POLYGON', srid=4326)})