In [1]:
import os
import sys
import re

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
# list shp files recursively
os.chdir('..')
abs_path = os.getcwd()

HIFLD_path = os.path.join(abs_path, 'output/HIFLD/centroids')
shp_files = [os.path.join(root, name) \
             for root, dirs, files in os.walk(HIFLD_path) \
             for name in files \
             if name.endswith(('.shp'))]

tribal_WA_folder = os.path.join(abs_path, 'data/Tribal_Lands_WA')
tribal_WA_file = 'TribalLands.shp'
tribal_WA_path = os.path.join(tribal_WA_folder, tribal_WA_file)

In [3]:
def read_shp(file, rows=100):
    df = gpd.read_file(file, rows=rows)

    return df

In [4]:
# read files
tribal_WA_shapes = gpd.read_file(tribal_WA_path)
# for all shp_files
centroids_dict = dict()
for file in shp_files:
    basename = os.path.basename(file).split('/')[0]
    fname = os.path.basename(basename).split('.')[0]

    # print(file)
    df = read_shp(file, rows=2000)
    centroids_dict[fname] = df
    # df_address[fname]['Facility_type'] = fname

In [5]:
# convert to CRS 'EPSG:4326'
def convert_EPSG4326(dict):
    """
    dict from full_address
    maybe want to store source data before conversion
    """
    for fname in dict:
        dict[fname] = dict[fname].to_crs("EPSG:4326")

    return dict

In [6]:
dict_EPSG4326 = convert_EPSG4326(centroids_dict)
tribal_WA_shapes = tribal_WA_shapes.to_crs("EPSG:4326")

# Spatial Joins

In [7]:
def spatial_join(dict, tribal, how='left'):
    """
    Check if coordinates falls within tribal lands
    Make new column 'Tribal'
    """
    dict_sjoin = {}
    for fname in dict:
        df = dict[fname].sjoin(tribal, how=how)
        
        # if centroid in tribal polygon, label as 1
        df['Tribal'] = df.index_right.apply(lambda x: 0 if pd.isna(x) else 1) 
        dict_sjoin[fname] = df        

    return dict_sjoin    

In [8]:
dict_sjoin = spatial_join(dict_EPSG4326, tribal_WA_shapes)

In [9]:
for fname in dict_sjoin:
    print(fname)

AllPlacesOfWorship
FDIC_Insured_Banks
Fire_Stations
Prison_Boundaries
PublicSchools
UrgentCareFacs


In [11]:
def save_shp(dict, save_dir):
    for fname in dict:
        # dict[fname]['source_centroid'] = gpd.GeoSeries.from_wkt(dict[fname]['source_centroid'])
        shp_file = dict[fname].set_geometry('geometry')
        # shp_file = gpd.GeoDataFrame(dict[fname], geometry=dict[fname]['source_centroid'])
        # shp_file.to_file(os.path.join(save_dir, ('{}.shp'.format(fname))), driver='ESRI Shapefile')
        save_path = os.path.join(save_dir, f"{fname}")
        create_dir(save_path)
        shp_file.to_file(save_path, driver='ESRI Shapefile')

def create_dir(save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

In [13]:
save_dir = os.path.join(abs_path, 'data/HIFLD/spatial_join')
create_dir(save_dir)

save_shp(dict_sjoin, save_dir)

  shp_file.to_file(save_path, driver='ESRI Shapefile')
  shp_file.to_file(save_path, driver='ESRI Shapefile')
  shp_file.to_file(save_path, driver='ESRI Shapefile')
  shp_file.to_file(save_path, driver='ESRI Shapefile')
  shp_file.to_file(save_path, driver='ESRI Shapefile')
  shp_file.to_file(save_path, driver='ESRI Shapefile')


In [12]:
dict_sjoin['PublicSchools'].notna().sum()

Full_Addre     2000
Place_type     2000
source_lon     2000
source_lat     2000
geometry       2000
index_right      66
OBJECTID         66
TRIBAL_NM         0
TRIBAL_NM1        0
TRIBAL_NM2        0
TREATY_NM        48
TREATY_DT        50
MPL_CD            0
LAND_TYPE        66
OLD_RES_NM        2
WEBLINK          50
GlobalID         66
created_us       66
created_da       66
last_edite       66
last_edi_1       66
SHAPE_Leng       66
SHAPE_Area       66
Tribal         2000
dtype: int64