In [1]:
import os
import sys
import re
import csv

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
# list shp files recursively
os.chdir('..')
abs_path = os.getcwd()

HIFLD_path = os.path.join(abs_path, 'output/HIFLD/centroids')
shp_files = [os.path.join(root, name) \
             for root, dirs, files in os.walk(HIFLD_path) \
             for name in files \
             if name.endswith(('.shp'))]

tribal_WA_folder = os.path.join(abs_path, 'data/Tribal_Lands_WA')
tribal_WA_file = 'TribalLands.shp'
tribal_WA_path = os.path.join(tribal_WA_folder, tribal_WA_file)

In [3]:
def read_shp(file, rows=100):
    """
    Read geoshapes file

    Parameters
    ----------
        rows (int): number of rows per file to read

    Returns
    -------
        df (GeoDataFrame)
    """
    df = gpd.read_file(file, rows=rows)

    return df

In [4]:
# read files
tribal_WA_shapes = gpd.read_file(tribal_WA_path)
# for all shp_files
centroids_dict = dict()
for file in shp_files:
    basename = os.path.basename(file).split('/')[0]
    fname = os.path.basename(basename).split('.')[0]

    # print(file)
    df = read_shp(file, rows=5000)
    centroids_dict[fname] = df
    # df_address[fname]['Facility_type'] = fname

In [5]:
# number of observations per geoshape file 
for file in shp_files:
    with open(file, 'rb') as f:
        row_count = len(f.readlines()) - 1
        
        basename = os.path.basename(file).split('/')[0]
        fname = os.path.basename(basename).split('.')[0]
        
        print('There are {} observations in {}.'.format(row_count, fname))

There are 5510 observations in AllPlacesOfWorship.
There are 5524 observations in FDIC_Insured_Banks.
There are 5497 observations in Fire_Stations.
There are 5536 observations in Prison_Boundaries.
There are 5491 observations in PublicSchools.
There are 5332 observations in UrgentCareFacs.


# Convert to EPSG:4326
* The centroids GeoDataFrames should be in CRS EPSG:4326 but we will convert again to make sure
* Also convert the tribal lands geoshape file to CRS EPSG:4236

In [7]:
def convert_EPSG4326(dict):
    """
    Convert each GeoDataFrame to 'EPSG:4326'
         
    Parameters
    ----------
        dict (dictionary): of GeoDataFrames

    Returns
    -------
        dict (dictionary): GeoDataFrames of 'EPSG:4326' CRS
    """
    for fname in dict:
        dict[fname] = dict[fname].to_crs("EPSG:4326")

    return dict

In [8]:
dict_EPSG4326 = convert_EPSG4326(centroids_dict)
tribal_WA_shapes = tribal_WA_shapes.to_crs("EPSG:4326")

# Spatial Joins
* Goal: determine what centroids fall within a tribal land geoshape for Washington state
* If 'NA' value is returned, it means that the centroid did not fall into any geoshape

In [9]:
def spatial_join(dict, gdf, how='left'):
    """
    Spatial join centroids to geoshape file

    Parameters
    ----------
        dict (dictionary): of GeoDataFrames
        gdf (GeoDataFrame): of land shapes
        how (parameter): sjoin parameter, default = 'left'

    Returns
    -------
        dict_sjoin (dictionary): GeoDataFrames of 'EPSG:4326' CRS
    """
    dict_sjoin = {}
    for fname in dict:
        df = dict[fname].sjoin(gdf, how=how)
        
        # if centroid in tribal polygon, label as 1
        df['Tribal'] = df.index_right.apply(lambda x: 0 if pd.isna(x) else 1) 
        dict_sjoin[fname] = df

    return dict_sjoin    

In [10]:
dict_sjoin = spatial_join(dict_EPSG4326, tribal_WA_shapes)

In [11]:
for fname in dict_sjoin:
    print(fname)

AllPlacesOfWorship
FDIC_Insured_Banks
Fire_Stations
Prison_Boundaries
PublicSchools
UrgentCareFacs


## Confirm there are valid spatial joins

In [12]:
# we see that there are urgent care facilities in WA tribal lands
dict_sjoin['UrgentCareFacs'].notna().sum()

Full_Addre     4810
Place_type     4810
source_lon     4810
source_lat     4810
geometry       4810
index_right     121
OBJECTID        121
TRIBAL_NM         0
TRIBAL_NM1        0
TRIBAL_NM2        0
TREATY_NM        89
TREATY_DT        92
MPL_CD            0
LAND_TYPE       121
OLD_RES_NM        3
WEBLINK          92
GlobalID        121
created_us      121
created_da      121
last_edite      121
last_edi_1      121
SHAPE_Leng      121
SHAPE_Area      121
Tribal         4810
dtype: int64

In [13]:
# same goes for public schools in WA tribal lands
dict_sjoin['PublicSchools'].notna().sum()

Full_Addre     5000
Place_type     5000
source_lon     5000
source_lat     5000
geometry       5000
index_right     167
OBJECTID        167
TRIBAL_NM         4
TRIBAL_NM1        4
TRIBAL_NM2        1
TREATY_NM       124
TREATY_DT       128
MPL_CD            4
LAND_TYPE       167
OLD_RES_NM        6
WEBLINK         129
GlobalID        167
created_us      167
created_da      167
last_edite      167
last_edi_1      167
SHAPE_Leng      167
SHAPE_Area      167
Tribal         5000
dtype: int64

In [14]:
# look at the 167 public schools that are in WA tribal lands
# we can see from the full address strings that they are indeed WA addresses
dict_sjoin['PublicSchools'].loc[dict_sjoin['PublicSchools'].OBJECTID.notna()]

Unnamed: 0,Full_Addre,Place_type,source_lon,source_lat,geometry,index_right,OBJECTID,TRIBAL_NM,TRIBAL_NM1,TRIBAL_NM2,...,OLD_RES_NM,WEBLINK,GlobalID,created_us,created_da,last_edite,last_edi_1,SHAPE_Leng,SHAPE_Area,Tribal
172,"11110 CONINE AVENUE SE, OLYMPIA, WA 98513",PublicSchools,-122.702956,47.067606,POINT (-122.70296 47.06761),12.0,13.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{5193B8F1-492D-4C00-976F-C739E87A4CB3},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,2.355789e+06,7.590936e+10,1
406,"78976 IMNAHA HWY, IMNAHA, OR 97842",PublicSchools,-116.835181,45.559461,POINT (-116.83518 45.55946),5.0,6.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{CAEBEC35-44AA-4796-A142-9D9C8B26708F},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,4.938263e+06,6.279701e+11,1
441,"715 N CALIFORNIA, PASCO, WA 99301",PublicSchools,-119.079731,46.240702,POINT (-119.07973 46.24070),30.0,31.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{6EA4E8D9-57C8-4404-AAD7-B957BF5C44A2},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,6.846421e+06,4.489107e+11,1
442,"1102 N 10TH AVE, PASCO, WA 99301",PublicSchools,-119.103613,46.240117,POINT (-119.10361 46.24012),30.0,31.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{6EA4E8D9-57C8-4404-AAD7-B957BF5C44A2},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,6.846421e+06,4.489107e+11,1
443,"6261 RD 12 SW, ROYAL CITY, WA 99357",PublicSchools,-119.622868,46.911992,POINT (-119.62287 46.91199),30.0,31.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{6EA4E8D9-57C8-4404-AAD7-B957BF5C44A2},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,6.846421e+06,4.489107e+11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4829,"4229 W LAKE SAMM PKWY SE, BELLEVUE, WA 98008",PublicSchools,-122.101725,47.568839,POINT (-122.10172 47.56884),38.0,39.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{F48544E9-67EE-4EC7-B3D8-EB361A1B42B7},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,4.958401e+06,-2.821400e+11,1
4835,"3100 MARTIN RD, MOUNT VERNON, WA 98273",PublicSchools,-122.299912,48.441753,POINT (-122.29991 48.44175),38.0,39.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{F48544E9-67EE-4EC7-B3D8-EB361A1B42B7},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,4.958401e+06,-2.821400e+11,1
4908,"9901 132ND AVE SE, RENTON, WA 98059",PublicSchools,-122.165835,47.514249,POINT (-122.16584 47.51425),38.0,39.0,,,,...,,http://files.usgwarchives.net/wa/indians/treat...,{F48544E9-67EE-4EC7-B3D8-EB361A1B42B7},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,4.958401e+06,-2.821400e+11,1
4909,"320 BUNNELL ST., GLENWOOD, WA 98619",PublicSchools,-121.289987,46.021326,POINT (-121.28999 46.02133),0.0,1.0,Yakama,Yakama Nation,<Null>,...,,,{9F544B38-1759-46A7-B6E4-8C3F30B662E1},WAECY_Geoservices,2023-10-13,WAECY_Geoservices,2023-10-13,1.293281e+06,5.914542e+10,1


# Summary Statistics
* Total counts of centroids that fall within WA tribal lands

In [15]:
summary_tribal = []
for fname in dict_sjoin:
    tmp = dict()
    
    tmp['Dataset'] = fname
    tmp['not_tribal'] = dict_sjoin[fname].Tribal.value_counts().tolist()[0]
    tmp['tribal'] = dict_sjoin[fname].shape[0] - dict_sjoin[fname].Tribal.value_counts().tolist()[0]
    
    summary_tribal.append(tmp)

In [16]:
pd.DataFrame(summary_tribal)

Unnamed: 0,Dataset,not_tribal,tribal
0,AllPlacesOfWorship,5000,0
1,FDIC_Insured_Banks,4976,24
2,Fire_Stations,4998,2
3,Prison_Boundaries,4862,138
4,PublicSchools,4833,167
5,UrgentCareFacs,4689,121
