In [16]:
import importlib
import os
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor, FeatureSet
from arcgis.geometry import Geometry
from arcgis.gis import GIS
import arcpy
from dotenv import load_dotenv, find_dotenv
import pandas as pd

In [17]:
# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
project_parent = Path('./').absolute().parent

data_dir = project_parent/'data'

data_raw = data_dir/'raw'
data_ext = data_dir/'external'
data_int = data_dir/'interim'
data_out = data_dir/'processed'

gdb_raw = data_raw/'raw.gdb'
gdb_int = data_int/'interim.gdb'
gdb_out = data_out/'processed.gdb'

# import the project package from the project package path
# ideally will be imported using 'from arcgis import da'
sys.path.append(str(project_parent/'src'))
import dm

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Introspectivley Examine and Get Geographies

In [18]:
# discover what countries are available, and get dataframe of countries
cntry_df = dm.util.get_countries()

cntry_df

Unnamed: 0,name,country,year
0,USA_ESRI_2019,USA,2019


In [19]:
# specify a country
usa = dm.Country('USA', source='local')

isinstance(usa, dm.Country)

True

In [20]:
# get the geographic resolutions available for the country as a dataframe from smallest to largest
geos = usa.geographies

geos

Unnamed: 0,name,alias,col_id,col_name,feature_class_path
0,block_groups,Block Groups,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
1,census_tracts,Census Tracts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
2,cities_and_towns_places,Cities and Towns (Places),ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
3,zip_codes,ZIP Codes,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
4,county_subdivisions,County Subdivisions,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
5,counties,Counties,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
6,cbsas,CBSAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
7,congressional_districts,Congressional Districts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
8,dmas,DMAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
9,states,States,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...


In [28]:
usa.geographies.iloc[9].feature_class_path

TypeError: descriptor 'mro' of 'type' object needs an argument

In [6]:
# select the lowest geographic resolution available for the most granular analysis

# ...by index...
geo_0_df = usa.get_geography(0)

geo_0_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,150010201001,150010201.001,"{""rings"": [[[-17265507.1935, 2249488.050099998..."
1,150010201002,150010201.002,"{""rings"": [[[-17266206.28, 2257608.5769000016]..."
2,150010201003,150010201.003,"{""rings"": [[[-17264567.657, 2249850.0975], [-1..."
3,150010201004,150010201.004,"{""rings"": [[[-17264343.9048, 2246722.049199998..."
4,150010202021,150010202.021,"{""rings"": [[[-17264970.6029, 2244290.0187], [-..."


In [None]:
# ...by name
geo0_df = usa.get_geography('block_group')

geo0_df.head()

In [67]:
# get the area of interest by filtering a larger geography
aoi_df = usa.get_geography('cbsa', selector='Seattle')

aoi_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-13651055.7226, 5968866.240900002..."


In [None]:
# get the block groups in the area of interest
origin_geo_df = geo_df.spatial.within(aoi)

# Get Locations

In [None]:
# get the store locations from the business listings
loc_brand_df = usa.business.search('Ace Hardware')

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.search('Ace Hardware').spatial.to_featureclass(gdb_int/'loc_brand')

loc_brand_df.head()

In [None]:
# get all the competitors for the area of interest

# ...by NAICS or SIC code...
loc_comp_df = usa.business.get_competitors(
    code=44413005,  # include ablity to specify shorted codes since NAICS codes can be shorter to be more general
    code_type='NAICS', 
    brand_exclude='Ace Hardware'
)

# ...or simplly by looking up using the existing location brand layer as a template
loc_comp_df = usa.business.get_competitors(brand_locations=loc_brand_df)

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.get_competitors(brand_locations=loc_brand_df).spatial.to_featureclass(gdb_int/'loc_comp')

# Calculate Proximity Metrics

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = dm.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_brand_df,
    destination_id_column='STORE_ID'
    destination_brand_or_concept_column='STORE_CONCEPT',              # think Nike Outlet versus Nike Brand Store
)

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = dm.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_comp_df,
    destination_id_column='LOCNUM'
    destination_brand_or_concept_column='CONAME',               # think Nike Outlet versus Nike Brand Store
)

# ...and can even chain to create output using dataframe to_... functions
prox_df_brand = dm.proximity.get_neareset_nth_locations(orgin_geo_df, 'ID', 'path-to-block-points', 'POP', loc_comp_df, 'LOCNUM', 'CONAME').to_csv('prox_df_brand.csv')

# Enrich