## Import dependencies

In [13]:
import pandas as pd
from geopandas import GeoDataFrame, points_from_xy
import geopandas as gpd
import time

from cartoframes import read_carto, to_carto
from cartoframes.auth import set_default_credentials, Credentials
from cartoframes.viz import *

from cartoframes.data.services import Isolines

from cartoframes.data.observatory import Catalog, Dataset
from cartoframes.data.observatory import Enrichment

from sqlalchemy import create_engine
from sqlalchemy import inspect

## CARTOframes

In [11]:
USER_NAME = 'alaina' #@param {type:"string"}
API_KEY = '67aba9210c40630d5e4d0dadcdb379e5589c9ab4' #@param {type:"string"}


set_default_credentials(
    username=USER_NAME,
    api_key=API_KEY
)

In [12]:
# search the carto data observatory for sociodemographic data
datasets = Catalog().country('usa').category('demographics').provider('usa_acs').datasets

You can find more entities with the Global country filter. To apply that filter run:
	Catalog().country('glo')


In [22]:
# set the acs sociodemographic data from 2013-2018 to a dictionary
dataset = Dataset.get('acs_sociodemogr_fd3ffe5e')
dataset.to_dict()

{'slug': 'acs_sociodemogr_fd3ffe5e',
 'name': 'Sociodemographics - United States of America (Census Block Group, 2016, 5yrs)',
 'description': 'The American Community Survey (ACS) is an ongoing survey that provides vital information on a yearly basis about the USA and its people. This dataset contains only a subset of the variables that have been deemed most relevant. More info: https://www.census.gov/programs-surveys/acs/about.html',
 'category_id': 'demographics',
 'country_id': 'usa',
 'data_source_id': 'sociodemographics',
 'provider_id': 'usa_acs',
 'geography_name': 'Census Block Group - United States of America (2015)',
 'geography_description': 'Shoreline clipped TIGER/Line boundaries. More info: https://carto.com/blog/tiger-shoreline-clip/',
 'temporal_aggregation': '5yrs',
 'time_coverage': '[2012-01-01, 2017-01-01)',
 'update_frequency': None,
 'is_public_data': True,
 'lang': 'eng',
 'version': '20122016',
 'category_name': 'Demographics',
 'provider_name': 'American Commun

In [23]:
# turn dictionary to dataframe
acs_df = dataset.to_dataframe()

## Use Sqlalchemy to connect to postgres database

In [17]:
engine = create_engine("postgresql://postgres:Apo20llo!@localhost:5432/final_project", echo = True)
connection = engine.connect()

2021-07-08 00:05:53,320 INFO sqlalchemy.engine.base.Engine select version()
select version()
2021-07-08 00:05:53,326 INFO sqlalchemy.engine.base.Engine {}
{}
2021-07-08 00:05:53,331 INFO sqlalchemy.engine.base.Engine select current_schema()
select current_schema()
2021-07-08 00:05:53,335 INFO sqlalchemy.engine.base.Engine {}
{}
2021-07-08 00:05:53,341 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2021-07-08 00:05:53,346 INFO sqlalchemy.engine.base.Engine {}
{}
2021-07-08 00:05:53,353 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2021-07-08 00:05:53,358 INFO sqlalchemy.engine.base.Engine {}
{}
2021-07-08 00:05:53,364 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
show standard_conforming_strings
2021-07-08 00:05:53,368 INFO sqlalchemy.engine.base.Engin

In [16]:
inspector = inspect(engine)
inspector.get_table_names()

2021-07-08 00:05:42,228 INFO sqlalchemy.engine.base.Engine SELECT c.relname FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = %(schema)s AND c.relkind in ('r', 'p')
SELECT c.relname FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = %(schema)s AND c.relkind in ('r', 'p')
2021-07-08 00:05:42,231 INFO sqlalchemy.engine.base.Engine {'schema': 'public'}
{'schema': 'public'}


['mastercard_original']

In [21]:
# Query All Records in the the Database
mastercard_df = pd.read_sql("SELECT * FROM mastercard_original", connection)
mastercard_df.head()

2021-07-08 00:10:02,013 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2021-07-08 00:10:02,017 INFO sqlalchemy.engine.base.Engine {'name': 'SELECT * FROM mastercard_original'}
{'name': 'SELECT * FROM mastercard_original'}
2021-07-08 00:10:02,023 INFO sqlalchemy.engine.base.Engine SELECT * FROM mastercard_original
SELECT * FROM mastercard_original
2021-07-08 00:10:02,026 INFO sqlalchemy.engine.base.Engine {}
{}


Unnamed: 0,the_geom,geoid,industry,txn_amt
0,0106000020E6100000010000000103000000010000000B...,361000000000,ret,152.38
1,0106000020E6100000010000000103000000010000000B...,361000000000,gro,225.84
2,0106000020E6100000010000000103000000010000000B...,361000000000,ret,124.44
3,0106000020E61000000100000001030000000100000015...,360000000000,gro,125.14
4,0106000020E6100000010000000103000000010000000B...,361000000000,gro,186.38


## Join the sociodemographic and mastercard datasets

In [29]:
# set variables
variables = Catalog().country('usa').category('demographics').provider('usa_acs').datasets[275].variables

You can find more entities with the Global country filter. To apply that filter run:
	Catalog().country('glo')


In [28]:
gdf_enrich = Enrichment().enrich_points(mastercard_df, variables, geom_col='the_geom')

In [34]:
# filter dataset for only retail category data
gdf_enrich_retail = gdf_enrich.loc[gdf_enrich['industry'] == 'ret']