In [1]:
import importlib
import os
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor, FeatureSet
from arcgis.geometry import Geometry
from arcgis.gis import GIS
import arcpy
from dotenv import load_dotenv, find_dotenv
import pandas as pd

In [2]:
# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
project_parent = Path('./').absolute().parent

data_dir = project_parent/'data'

data_raw = data_dir/'raw'
data_ext = data_dir/'external'
data_int = data_dir/'interim'
data_out = data_dir/'processed'

gdb_raw = data_raw/'raw.gdb'
gdb_int = data_int/'interim.gdb'
gdb_out = data_out/'processed.gdb'

# import the project package from the project package path
# ideally will be imported using 'from arcgis import da'
sys.path.append(str(project_parent/'src'))
import dm

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

  class GeoAccessorIO(GeoAccessor):


# Introspectivley Examine and Get Geographies

In [3]:
# discover what countries are available, and get dataframe of countries
cntry_df = dm.util.get_countries()

cntry_df

Unnamed: 0,name,country,year
0,USA_ESRI_2019,USA,2019


In [4]:
# specify a country
usa = dm.Country('USA', source='local')

isinstance(usa, dm.Country)

True

In [5]:
# get the geographic resolutions available for the country as a dataframe from smallest to largest
geos = usa.geographies

geos

Unnamed: 0,name,alias,col_id,col_name,data_path
0,block_groups,Block Groups,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
1,census_tracts,Census Tracts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
2,cities_and_towns_places,Cities and Towns (Places),ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
3,zip_codes,ZIP Codes,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
4,county_subdivisions,County Subdivisions,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
5,counties,Counties,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
6,cbsas,CBSAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
7,congressional_districts,Congressional Districts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
8,dmas,DMAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
9,states,States,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...


In [6]:
# ...by name with a custom query
bg_df = usa.get_geography('block_groups', query_string="ID LIKE '15001020%'")

bg_df.head()

In [7]:
# ---by selector and field to get the same results
bg_df = usa.get_geography('block_groups', selector='15001020', selection_field='ID')

bg_df.head()

In [10]:
# get the area of interest by filtering a larger geography using a pattern selector
aoi_df = usa.get_geography('cbsas', selector='Seattle')

aoi_df.head()

In [9]:
%%time
# select the lowest geographic resolution available for the most granular analysis

# ...by index...
geo0_df = usa.get_geography(0)

geo0_df.head()

KeyboardInterrupt: 

In [None]:
%%time
# get the block groups in the area of interest
origin_geo_df = geo0_df.spatial.within(aoi_df)

origin_geo_df.info()

In [18]:
%%time

origin_geo_df = usa.get_geography(0).spatial.within(aoi_df)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt



# Get Locations

In [None]:
# get the store locations from the business listings
loc_brand_df = usa.business.search('Ace Hardware')

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.search('Ace Hardware').spatial.to_featureclass(gdb_int/'loc_brand')

loc_brand_df.head()

In [None]:
# get all the competitors for the area of interest

# ...by NAICS or SIC code...
loc_comp_df = usa.business.get_competitors(
    code=44413005,  # include ablity to specify shorted codes since NAICS codes can be shorter to be more general
    code_type='NAICS', 
    brand_exclude='Ace Hardware'
)

# ...or simplly by looking up using the existing location brand layer as a template
loc_comp_df = usa.business.get_competitors(brand_locations=loc_brand_df)

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.get_competitors(brand_locations=loc_brand_df).spatial.to_featureclass(gdb_int/'loc_comp')

# Calculate Proximity Metrics

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = usa.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_brand_df,
    destination_id_column='STORE_ID'
    destination_brand_or_concept_column='STORE_CONCEPT',              # think Nike Outlet versus Nike Brand Store
)

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = usa.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_comp_df,
    destination_id_column='LOCNUM'
    destination_brand_or_concept_column='CONAME',               # think Nike Outlet versus Nike Brand Store
)

# ...and can even chain to create output using dataframe to_... functions
prox_df_brand = usa.proximity.get_neareset_nth_locations(orgin_geo_df, 'ID', 'path-to-block-points', 'POP', loc_comp_df, 'LOCNUM', 'CONAME').to_csv('prox_df_brand.csv')

# Enrich