In [1]:
import os
import sys
import re
import csv
import random

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely.wkt

In [2]:
# list shp files recursively
os.chdir('..')
abs_path = os.getcwd()

RUCA_folder = os.path.join(abs_path, 'data')
RUCA_file = 'ruca2010revised.xlsx'
RUCA_path = os.path.join(RUCA_folder, RUCA_file)

output_folder = os.path.join(abs_path, 'output')
OMOP_file = 'sample_spatial_join_OMOP_clean.csv'
OMOP_path = os.path.join(output_folder, OMOP_file)

tract_folder = os.path.join(abs_path, 'data/cb_2019_us_tract_500k')
tract_file = 'cb_2019_us_tract_500k.shp'
tract_path = os.path.join(tract_folder, tract_file)

nominatim_sample_file = "nominatim_sample.csv"
nominatim_sample_path = os.path.join(abs_path, "output", nominatim_sample_file)

nominatim_sample_1k_file = "nominatim_sample_1k.csv"
nominatim_sample_1k_path = os.path.join(abs_path, "output", nominatim_sample_1k_file)

nominatim_keep_columns_merge_county_parsed_path = os.path.join(abs_path, 'output', 'nominatim_keep_columns_merge_county_parsed.csv')

# RUCA Dataset
* 2010 Revised

In [3]:
RUCA_df = pd.read_excel(RUCA_path, sheet_name='Data', engine="openpyxl", header=1)
RUCA_df = RUCA_df.rename(columns={'State-County-Tract FIPS Code (lookup by address at http://www.ffiec.gov/Geocode/)':'State-County-Tract FIPS Code'})
RUCA_df

Unnamed: 0,State-County FIPS Code,Select State,Select County,State-County-Tract FIPS Code,Primary RUCA Code 2010,"Secondary RUCA Code, 2010 (see errata)","Tract Population, 2010","Land Area (square miles), 2010","Population Density (per square mile), 2010"
0,1001,AL,Autauga County,1001020100,1,1.0,1912,3.787641,504.799727
1,1001,AL,Autauga County,1001020200,1,1.0,2170,1.289776,1682.462370
2,1001,AL,Autauga County,1001020300,1,1.0,3373,2.065366,1633.124331
3,1001,AL,Autauga County,1001020400,1,1.0,4386,2.464376,1779.760676
4,1001,AL,Autauga County,1001020500,1,1.0,10766,4.400686,2446.436531
...,...,...,...,...,...,...,...,...,...
73997,72153,PR,Yauco Municipio,72153750501,1,1.0,4960,0.693337,7153.810353
73998,72153,PR,Yauco Municipio,72153750502,1,1.0,2092,0.263530,7938.381391
73999,72153,PR,Yauco Municipio,72153750503,1,1.0,2357,1.280621,1840.512771
74000,72153,PR,Yauco Municipio,72153750601,1,1.0,5315,4.251936,1250.018953


# TIGER/Line Shapefiles

In [4]:
tract_shapes = gpd.read_file(tract_path)
tract_shapes

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,34,005,702808,1400000US34005702808,34005702808,7028.08,CT,1510553,0,"POLYGON ((-74.90801 40.03250, -74.90496 40.036..."
1,39,153,504200,1400000US39153504200,39153504200,5042,CT,808151,0,"POLYGON ((-81.51807 41.05957, -81.51805 41.062..."
2,36,071,000502,1400000US36071000502,36071000502,5.02,CT,967431,969216,"POLYGON ((-74.02226 41.49281, -74.02180 41.496..."
3,06,013,370000,1400000US06013370000,06013370000,3700,CT,999356,0,"POLYGON ((-122.32697 37.93502, -122.32610 37.9..."
4,48,113,019204,1400000US48113019204,48113019204,192.04,CT,2851229,9966,"POLYGON ((-96.76902 32.94655, -96.76899 32.950..."
...,...,...,...,...,...,...,...,...,...,...
73863,48,245,011303,1400000US48245011303,48245011303,113.03,CT,155358411,14272444,"POLYGON ((-94.28353 29.88008, -94.26230 29.910..."
73864,01,073,012302,1400000US01073012302,01073012302,123.02,CT,168942340,1269965,"POLYGON ((-87.16295 33.49312, -87.16185 33.495..."
73865,72,113,072400,1400000US72113072400,72113072400,724,CT,11230767,184996,"POLYGON ((-66.58227 18.03935, -66.58177 18.039..."
73866,06,081,608023,1400000US06081608023,06081608023,6080.23,CT,805954,119028,"POLYGON ((-122.27898 37.54565, -122.27590 37.5..."


In [5]:
tract_shapes.columns

Index(['STATEFP', 'COUNTYFP', 'TRACTCE', 'AFFGEOID', 'GEOID', 'NAME', 'LSAD',
       'ALAND', 'AWATER', 'geometry'],
      dtype='object')

In [6]:
tract_shapes['State-County-Tract FIPS Code'] = tract_shapes[['STATEFP', 'COUNTYFP', 'TRACTCE']].agg(''.join, axis=1)
tract_shapes['State-County-Tract FIPS Code'] = tract_shapes['State-County-Tract FIPS Code'].astype(np.int64)
tract_shapes

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry,State-County-Tract FIPS Code
0,34,005,702808,1400000US34005702808,34005702808,7028.08,CT,1510553,0,"POLYGON ((-74.90801 40.03250, -74.90496 40.036...",34005702808
1,39,153,504200,1400000US39153504200,39153504200,5042,CT,808151,0,"POLYGON ((-81.51807 41.05957, -81.51805 41.062...",39153504200
2,36,071,000502,1400000US36071000502,36071000502,5.02,CT,967431,969216,"POLYGON ((-74.02226 41.49281, -74.02180 41.496...",36071000502
3,06,013,370000,1400000US06013370000,06013370000,3700,CT,999356,0,"POLYGON ((-122.32697 37.93502, -122.32610 37.9...",6013370000
4,48,113,019204,1400000US48113019204,48113019204,192.04,CT,2851229,9966,"POLYGON ((-96.76902 32.94655, -96.76899 32.950...",48113019204
...,...,...,...,...,...,...,...,...,...,...,...
73863,48,245,011303,1400000US48245011303,48245011303,113.03,CT,155358411,14272444,"POLYGON ((-94.28353 29.88008, -94.26230 29.910...",48245011303
73864,01,073,012302,1400000US01073012302,01073012302,123.02,CT,168942340,1269965,"POLYGON ((-87.16295 33.49312, -87.16185 33.495...",1073012302
73865,72,113,072400,1400000US72113072400,72113072400,724,CT,11230767,184996,"POLYGON ((-66.58227 18.03935, -66.58177 18.039...",72113072400
73866,06,081,608023,1400000US06081608023,06081608023,6080.23,CT,805954,119028,"POLYGON ((-122.27898 37.54565, -122.27590 37.5...",6081608023


# Merge RUCA to Census Tract

In [7]:
RUCA_tracts = RUCA_df.merge(tract_shapes, on='State-County-Tract FIPS Code', how='left')
RUCA_tracts

Unnamed: 0,State-County FIPS Code,Select State,Select County,State-County-Tract FIPS Code,Primary RUCA Code 2010,"Secondary RUCA Code, 2010 (see errata)","Tract Population, 2010","Land Area (square miles), 2010","Population Density (per square mile), 2010",STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,1001,AL,Autauga County,1001020100,1,1.0,1912,3.787641,504.799727,01,001,020100,1400000US01001020100,01001020100,201,CT,9817813.0,28435.0,"POLYGON ((-86.50916 32.47344, -86.50620 32.475..."
1,1001,AL,Autauga County,1001020200,1,1.0,2170,1.289776,1682.462370,01,001,020200,1400000US01001020200,01001020200,202,CT,3325680.0,5669.0,"POLYGON ((-86.48093 32.48154, -86.47945 32.485..."
2,1001,AL,Autauga County,1001020300,1,1.0,3373,2.065366,1633.124331,01,001,020300,1400000US01001020300,01001020300,203,CT,5349273.0,9054.0,"POLYGON ((-86.47087 32.47573, -86.46964 32.478..."
3,1001,AL,Autauga County,1001020400,1,1.0,4386,2.464376,1779.760676,01,001,020400,1400000US01001020400,01001020400,204,CT,6384276.0,8408.0,"POLYGON ((-86.45394 32.49318, -86.45308 32.493..."
4,1001,AL,Autauga County,1001020500,1,1.0,10766,4.400686,2446.436531,01,001,020500,1400000US01001020500,01001020500,205,CT,11408866.0,43534.0,"POLYGON ((-86.43816 32.45069, -86.43773 32.451..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73997,72153,PR,Yauco Municipio,72153750501,1,1.0,4960,0.693337,7153.810353,72,153,750501,1400000US72153750501,72153750501,7505.01,CT,1820185.0,0.0,"POLYGON ((-66.87736 18.03493, -66.87529 18.034..."
73998,72153,PR,Yauco Municipio,72153750502,1,1.0,2092,0.263530,7938.381391,72,153,750502,1400000US72153750502,72153750502,7505.02,CT,689930.0,0.0,"POLYGON ((-66.86686 18.02211, -66.86401 18.024..."
73999,72153,PR,Yauco Municipio,72153750503,1,1.0,2357,1.280621,1840.512771,72,153,750503,1400000US72153750503,72153750503,7505.03,CT,3298433.0,1952.0,"POLYGON ((-66.88788 18.03583, -66.88326 18.036..."
74000,72153,PR,Yauco Municipio,72153750601,1,1.0,5315,4.251936,1250.018953,72,153,750601,1400000US72153750601,72153750601,7506.01,CT,10987037.0,4527.0,"POLYGON ((-66.86634 18.01929, -66.85396 18.025..."


In [8]:
def create_dir(save_dir):
    """
    Creates directory if it does not exist
         
    Parameters
    ----------
        save_dir (str): path of desired output directory
    """
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

In [9]:
save_dir = os.path.join(abs_path, 'output')

shp_file = RUCA_tracts.set_geometry('geometry')
save_path = os.path.join(save_dir, 'RUCA_tracts')
create_dir(save_path)

shp_file.to_file(save_path, driver='ESRI Shapefile')

  shp_file.to_file(save_path, driver='ESRI Shapefile')


In [9]:
RUCA_gdf = gpd.GeoDataFrame(RUCA_tracts, geometry='geometry')
RUCA_gdf = RUCA_gdf.to_crs("EPSG:4326")

# Spatial Join OMOP to RUCA Tracts

In [42]:
import shapely.wkt

OMOP_df = pd.read_csv(OMOP_path)
OMOP_df.geometry.isna().sum()
OMOP_df = OMOP_df.loc[OMOP_df.geometry.notna()]
OMOP_df.shape

  OMOP_df = pd.read_csv(OMOP_path)


(104097, 21)

In [43]:
OMOP_df['geometry'].astype('str').apply(shapely.wkt.loads)

0         POINT (-71.04352199999346 42.33547200002667)
1         POINT (-71.16493999969576 42.70621300000711)
2         POINT (-74.16282099988595 40.76993499987205)
3         POINT (-78.74781599997843 42.79853499984358)
4         POINT (-70.11377699970232 44.42860999977688)
                              ...                     
104097    POINT (-84.09517421962246 34.04172694900986)
104098    POINT (-84.10131848955332 33.96679709299809)
104099    POINT (-83.90221543184028 34.06883234729177)
104100    POINT (-84.49410603994914 37.99650802456039)
104101    POINT (-81.35019228948175 40.86915505731037)
Name: geometry, Length: 104097, dtype: object

In [50]:
OMOP_gdf = gpd.GeoDataFrame(OMOP_df, geometry=OMOP_df['geometry'].apply(shapely.wkt.loads))
OMOP_gdf.crs = "EPSG:4326"

In [59]:
OMOP_gdf = OMOP_gdf.drop(['index_right'], axis=1)

In [60]:
OMOP_RUCA = OMOP_gdf.sjoin(RUCA_gdf, how='left')
OMOP_RUCA

Unnamed: 0,Full_Addre,Place_type,source_lon,source_lat,geometry,OBJECTID,GEOID_1,NAME_1,NAMELSAD_1,Eligible_L,...,"Population Density (per square mile), 2010",STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER
0,"523 E BROADWAY, SOUTH BOSTON, MA 02127",AllPlacesOfWorship,-71.043522,42.335472,POINT (-71.04352 42.33547),,,,,,...,31810.671141,25,025,060400,1400000US25025060400,25025060400,604,CT,399279.0,0.0
1,"454 ESSEX ST, LAWRENCE, MA 01840",AllPlacesOfWorship,-71.164940,42.706213,POINT (-71.16494 42.70621),,,,,,...,7077.781854,25,009,250100,1400000US25009250100,25009250100,2501,CT,852256.0,198604.0
2,"569 BROADWAY, NEWARK, NJ 07104",AllPlacesOfWorship,-74.162821,40.769935,POINT (-74.16282 40.76993),,,,,,...,10667.291472,34,013,009600,1400000US34013009600,34013009600,96,CT,1089628.0,139136.0
3,"3210 SOUTHWESTERN BLVD, ORCHARD PARK, NY 14127",AllPlacesOfWorship,-78.747816,42.798535,POINT (-78.74782 42.79853),,,,,,...,798.869054,36,029,013702,1400000US36029013702,36029013702,137.02,CT,18369144.0,0.0
4,"431 CAMPGROUND RD, LIVERMORE FLS, ME 04254",AllPlacesOfWorship,-70.113777,44.428610,POINT (-70.11378 44.42861),,,,,,...,161.930309,23,001,044000,1400000US23001044000,23001044000,440,CT,50966670.0,1924400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104097,"1300 PEACHTREE INDUSTRIAL BOULEVARD, SUWANEE, ...",UrgentCareFacs,-84.095174,34.041727,POINT (-84.09517 34.04173),,,,,,...,1236.558511,13,135,050210,1400000US13135050210,13135050210,502.10,CT,12738728.0,96904.0
104098,"2660 SATELLITE BOULEVARD NORTHWEST, DULUTH, GA...",UrgentCareFacs,-84.101318,33.966797,POINT (-84.10132 33.96680),,,,,,...,991.926210,13,135,050215,1400000US13135050215,13135050215,502.15,CT,7288214.0,27626.0
104099,"3685 BRASELTON HIGHWAY, DACULA, GA 30019",UrgentCareFacs,-83.902215,34.068832,POINT (-83.90222 34.06883),,,,,,...,1178.524050,13,135,050610,1400000US13135050610,13135050610,506.10,CT,32703063.0,360953.0
104100,"1055 DOVE RUN ROAD, LEXINGTON, KY 40502",UrgentCareFacs,-84.494106,37.996508,POINT (-84.49411 37.99651),,,,,,...,4060.929442,21,067,002302,1400000US21067002302,21067002302,23.02,CT,3315849.0,74.0


In [61]:
OMOP_RUCA.columns

Index(['Full_Addre', 'Place_type', 'source_lon', 'source_lat', 'geometry',
       'OBJECTID', 'GEOID_1', 'NAME_1', 'NAMELSAD_1', 'Eligible_L',
       'Shape_Leng', 'Shape_Le_1', 'Shape_Area', 'Tribal', 'address_1',
       'address_2', 'city', 'state', 'zip', 'STATE', 'index_right',
       'State-County FIPS Code', 'Select State', 'Select County',
       'State-County-Tract FIPS Code', 'Primary RUCA Code 2010',
       'Secondary RUCA Code, 2010 (see errata)', 'Tract Population, 2010',
       'Land Area (square miles), 2010',
       'Population Density (per square mile), 2010', 'STATEFP', 'COUNTYFP',
       'TRACTCE', 'AFFGEOID', 'GEOID', 'NAME', 'LSAD', 'ALAND', 'AWATER'],
      dtype='object')

In [62]:
save_dir = os.path.join(abs_path, 'output')

shp_file = OMOP_RUCA.set_geometry('geometry')
save_path = os.path.join(save_dir, 'OMOP_RUCA')
create_dir(save_path)

shp_file.to_file(save_path, driver='ESRI Shapefile')

  shp_file.to_file(save_path, driver='ESRI Shapefile')


# RUCA and Tribal Summary

In [67]:
pd.DataFrame(OMOP_RUCA['Primary RUCA Code 2010'].value_counts())

Unnamed: 0_level_0,count
Primary RUCA Code 2010,Unnamed: 1_level_1
1.0,63703
4.0,10436
10.0,9975
2.0,7687
7.0,7143
5.0,2686
8.0,1079
3.0,585
6.0,333
9.0,248


In [69]:
urban_codes = [1.0, 2.0, 3.0]
omop_ruca_lst = []
for i, row in OMOP_RUCA.iterrows():
    # tmp = {}
    
    if row['Primary RUCA Code 2010'] in urban_codes and row.Tribal == 0:
        row['Rural_Tribal'] = 0
        omop_ruca_lst.append(row)
    elif row['Primary RUCA Code 2010'] in urban_codes and row.Tribal == 1:
        row['Rural_Tribal'] = 0
        omop_ruca_lst.append(row)
    elif row['Primary RUCA Code 2010'] not in urban_codes and row.Tribal == 0:
        row['Rural_Tribal'] = 0
        omop_ruca_lst.append(row)
    elif row['Primary RUCA Code 2010'] not in urban_codes and row.Tribal == 1:
        row['Rural_Tribal'] = 1
        omop_ruca_lst.append(row)

In [73]:
omop_ruca_tribal = pd.concat(omop_ruca_lst, axis=1).transpose()

In [74]:
pd.DataFrame(omop_ruca_tribal.Rural_Tribal.value_counts())

Unnamed: 0_level_0,count
Rural_Tribal,Unnamed: 1_level_1
0,100972
1,3125


# Ruralilty and Tribal Lands for Nominatim Addresses
* Includes only "SUCCESSFUL ADDRESSES"

In [10]:
def rurality(x):
    urban_codes = [1.0, 2.0, 3.0]
    if x in urban_codes:
        return 0
    else:
        return 1

In [11]:
tribal_priority_folder = os.path.join(abs_path, 'output/Tribal_Priority_2_5')
tribal_priority_file = 'Tribal_Priority_2_5_combined.shp'
tribal_priority_path = os.path.join(tribal_priority_folder, tribal_priority_file)

tribal_priority_shapes = gpd.read_file(tribal_priority_path)
tribal_priority_shapes = tribal_priority_shapes.to_crs("EPSG:4326")

In [12]:
nominatim_df = pd.read_csv(nominatim_keep_columns_merge_county_parsed_path)
# nominatim_df.geometry.isna().sum()

# convert to geopandas dataframe
nominatim_gdf = gpd.GeoDataFrame(nominatim_df, geometry=nominatim_df['geometry'].apply(shapely.wkt.loads))
nominatim_gdf.crs = "EPSG:4326"

In [13]:
nominatim_RUCA = nominatim_gdf.sjoin(RUCA_gdf, how='left')
nominatim_RUCA = nominatim_RUCA.drop(columns=["index_right", "State-County FIPS Code", "Select State", "Select County", "State-County-Tract FIPS Code", "Secondary RUCA Code, 2010 (see errata)", 
                                              "Tract Population, 2010", "Land Area (square miles), 2010", "Land Area (square miles), 2010", 
                                              "STATEFP", "COUNTYFP", "TRACTCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER"])

nominatim_RUCA["rural"] = nominatim_RUCA["Primary RUCA Code 2010"].apply(lambda x: rurality(x))

In [14]:
nominatim_RUCA_tribal = nominatim_RUCA.sjoin(tribal_priority_shapes, how='left')
nominatim_RUCA_tribal['tribal'] = nominatim_RUCA_tribal.OBJECTID.apply(lambda x: 0 if np.isnan(x) else 1)
nominatim_RUCA_tribal = nominatim_RUCA_tribal.drop(columns=['Population Density (per square mile), 2010', 'index_right',
                                                            'OBJECTID', 'GEOID_1', 'NAME_1', 'NAMELSAD_1', 'Eligible_L',
                                                            'Shape_Leng', 'Shape_Le_1', 'Shape_Area',])

nominatim_RUCA_tribal["rural_tribal"] = nominatim_RUCA_tribal.apply(lambda x: 1 if x.rural == 1 and x.tribal == 1 else 0, axis=1)
nominatim_RUCA_tribal

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,rural,tribal,rural_tribal
0,1,523,East,,Broadway,,,Suffolk County,523 E Broadway,,...,MA,"523 E BROADWAY, SOUTH BOSTON, MA 02127",42.335472,-71.043522,POINT (-71.04352 42.33547),"523,East Broadway , South Boston, Suffolk Co...",1.0,0,0,0
1,2,569,,,Broadway,,,Essex County,569 Broadway,,...,NJ,"569 BROADWAY, NEWARK, NJ 07104",40.769935,-74.162821,POINT (-74.16282 40.76993),"569, Broadway , Newark, Essex County, New Je...",1.0,0,0,0
2,3,3210,,,Southwestern,Boulevard,,Erie County,3210 Southwestern Blvd,,...,NY,"3210 SOUTHWESTERN BLVD, ORCHARD PARK, NY 14127",42.798535,-78.747816,POINT (-78.74782 42.79853),"3210, Southwestern Boulevard , Orchard Park, ...",1.0,0,0,0
3,4,431,,,Campground,Road,,Androscoggin County,431 Campground Rd,,...,ME,"431 CAMPGROUND RD, LIVERMORE FLS, ME 04254",44.428610,-70.113777,POINT (-70.11378 44.42861),"431, Campground Road , Livermore Fls, Androsc...",7.0,1,0,0
4,5,105,,,Harris,Avenue,,Cumberland County,105 Harris Ave,,...,ME,"105 HARRIS AVE, PORTLAND, ME 04103",43.699355,-70.303214,POINT (-70.30321 43.69936),"105, Harris Avenue , Portland, Cumberland Cou...",1.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51265,51441,76,,,Rutledge,Street,,Kings County,76 Rutledge St,,...,NY,"PAYROLL ACCOUNT 76 RUTLEDGE ST, BROOKLYN, NY 1...",40.700628,-73.959196,POINT (-73.95920 40.70063),"76, Rutledge Street , Brooklyn, Kings County,...",1.0,0,0,0
51266,51442,340,East,,Mosholu,Parkway,South,Bronx County,340 E Mosholu Pkwy S,APT 6C,...,NY,"340 E MOSHOLU PKWY S APT 6C, BRONX, NY 10458",40.870530,-73.882055,POINT (-73.88206 40.87053),"340,East Mosholu Parkway South, Bronx, Bronx ...",1.0,0,0,0
51267,51443,656,East,,237th,Street,,Bronx County,656 E 237Th St,,...,NY,"656 E 237TH ST, BRONX, NY 10466",40.897795,-73.856772,POINT (-73.85677 40.89780),"656,East 237th Street , Bronx, Bronx County, ...",1.0,0,0,0
51268,51444,903,,,Maple,Street,,Monroe County,903 Maple St,,...,NY,"903 MAPLE ST, ROCHESTER, NY 14611",43.155212,-77.649591,POINT (-77.64959 43.15521),"903, Maple Street , Rochester, Monroe County,...",1.0,0,0,0


In [15]:
print(nominatim_RUCA_tribal.rural_tribal.value_counts())
print(nominatim_RUCA_tribal.tribal.value_counts())
print(nominatim_RUCA_tribal.rural.value_counts())

rural_tribal
0    50770
1      511
Name: count, dtype: int64
tribal
0    50704
1      577
Name: count, dtype: int64
rural
0    39558
1    11723
Name: count, dtype: int64


In [17]:
nominatim_RUCA_tribal_path = os.path.join(abs_path, 'output', 'nominatim_rural_tribal.csv')
nominatim_RUCA_tribal.to_csv(nominatim_RUCA_tribal_path, index=False)

In [3]:
nominatim_RUCA_tribal_path = os.path.join(abs_path, 'output', 'nominatim_rural_tribal.csv')
nominatim_RUCA_tribal = pd.read_csv(nominatim_RUCA_tribal_path)

In [4]:
print(nominatim_RUCA_tribal.rural.value_counts())
print(nominatim_RUCA_tribal.tribal.value_counts())

rural
0    39558
1    11723
Name: count, dtype: int64
tribal
0    50704
1      577
Name: count, dtype: int64


## Random Sampling
* Grouped by state and rural or tribal

In [16]:
random.seed(42)

nominatim_sample = nominatim_RUCA_tribal.groupby(by=['state_abbr', 'rural_tribal']).apply(lambda x: x.sample(n=20, replace=True)).reset_index(drop=True)
nominatim_sample = nominatim_sample.drop_duplicates()
# nominatim_sample_1000 = pd.concat([dropped_territories, nominatim_sample])
nominatim_sample

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,rural,tribal,rural_tribal
0,26635,7,,,Frankford,Avenue,,Calhoun County,7 Frankford Ave,Bldg 141,...,AL,"7 Frankford Ave Bldg 141, Anniston, AL 36201",33.624149,-85.967567,POINT (-85.96757 33.62415),"7, Frankford Avenue , Anniston, Calhoun Count...",2.0,0,0,0
1,30049,1620,,,Boswell,Street,,Coffee County,1620 Boswell St,,...,AL,"1620 BOSWELL ST, ELBA, AL 36323",31.413124,-86.083737,POINT (-86.08374 31.41312),"1620, Boswell Street , Elba, Coffee County, A...",10.0,1,0,0
2,32629,1228,,County Road,89,,,Jackson County,1228 County Road 89,,...,AL,"1228 COUNTY ROAD 89, BRYANT, AL 35958",34.876513,-85.588194,POINT (-85.58819 34.87651),"1228, County Road 89 , Bryant, Jackson County...",2.0,0,0,0
3,40214,1305,,,North,Pike,,Colbert County,1305 North Pike,,...,AL,"1305 NORTH PIKE, CHEROKEE, AL 35616",34.769589,-87.963863,POINT (-87.96386 34.76959),"1305, North Pike , Cherokee, Colbert County, ...",2.0,0,0,0
4,28601,170,,,2nd,Avenue,Northwest,Walker County,170 2Nd Avenue Northwest,,...,AL,"170 2ND AVENUE NORTHWEST, CARBON HILL, AL 35549",33.889849,-87.528517,POINT (-87.52852 33.88985),"170, 2nd Avenue Northwest, Carbon Hill, Walke...",6.0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1342,31770,40,,,Honor Farm,Road,,Fremont County,40 Honor Farm Rd,,...,WY,"40 HONOR FARM RD, RIVERTON, WY 82501",43.059427,-108.369896,POINT (-108.36990 43.05943),"40, Honor Farm Road , Riverton, Fremont Count...",4.0,1,1,1
1343,4610,933,West,,Main,Street,,Fremont County,933 W Main St,,...,WY,"933 W Main St, Riverton, WY 82501",43.024261,-108.404659,POINT (-108.40466 43.02426),"933,West Main Street , Riverton, Fremont Coun...",4.0,1,1,1
1344,40330,510,North,,First,Street,,Fremont County,510 North First Street,,...,WY,"510 NORTH FIRST STREET, RIVERTON, WY 82501",43.029179,-108.391055,POINT (-108.39106 43.02918),"510,North First Street , Riverton, Fremont Co...",4.0,1,1,1
1351,40331,1620,East,,Sunset,Avenue,,Fremont County,1620 East Sunset Avenue,,...,WY,"1620 EAST SUNSET AVENUE, RIVERTON, WY 82501",43.035409,-108.369598,POINT (-108.36960 43.03541),"1620,East Sunset Avenue , Riverton, Fremont C...",4.0,1,1,1


In [19]:
nominatim_sample.state_abbr.value_counts()

state_abbr
OK    39
MT    34
MN    33
WY    31
AZ    31
ID    31
ND    31
SD    30
NM    30
MI    28
NE    28
CO    24
UT    24
WA    23
OR    22
NC    22
NV    21
WI    21
KS    21
MA    20
CA    20
TX    20
TN    20
RI    20
PA    20
FL    20
GA    20
MD    20
NY    20
OH    20
HI    20
IN    20
ME    20
KY    20
NJ    20
MS    20
AR    20
IL    20
IA    20
NH    20
AL    20
MO    19
CT    19
VA    19
VT    19
WV    19
SC    18
DE    18
Name: count, dtype: int64

In [16]:
print(nominatim_sample.rural.value_counts())
print(nominatim_sample.tribal.value_counts())
print(nominatim_sample.rural_tribal.value_counts())

rural
0    640
1    446
Name: count, dtype: int64
tribal
0    933
1    153
Name: count, dtype: int64
rural_tribal
0    936
1    150
Name: count, dtype: int64


In [17]:
nominatim_sample_path = os.path.join(abs_path, 'output', 'nominatim_sample_1k_rural_tribal.csv')
nominatim_sample.to_csv(nominatim_sample_path, index=False)

## Tribal Sampling
* Take all tribal addreses

In [22]:
random.seed(42)

non_tribal_df = nominatim_RUCA_tribal.loc[nominatim_RUCA_tribal.tribal == 0]
non_tribal_sample = non_tribal_df.groupby(by=['state_abbr']).apply(lambda x: x.sample(n=12, replace=True)).reset_index(drop=True)

tribal_df = nominatim_RUCA_tribal.loc[nominatim_RUCA_tribal.tribal == 1]

nominatim_sample_tribal = pd.concat([tribal_df, non_tribal_sample])
nominatim_sample_tribal.tribal.value_counts()

tribal
0    576
1    564
Name: count, dtype: int64

In [23]:
nominatim_sample_path = os.path.join(abs_path, 'output', 'nominatim_sample_1k_tribal.csv')
nominatim_sample_tribal.to_csv(nominatim_sample_path, index=False)

# Rurality for Nominatim Sample
* And determine if tribal land

In [9]:
def rurality(x):
    urban_codes = [1.0, 2.0, 3.0]
    if x in urban_codes:
        return 0
    else:
        return 1

In [None]:
import shapely.wkt

nominatim_df = pd.read_csv(nominatim_sample_path)
# nominatim_df.geometry.isna().sum()

# convert to geopandas dataframe
nominatim_gdf = gpd.GeoDataFrame(nominatim_df, geometry=nominatim_df['geometry'].apply(shapely.wkt.loads))
nominatim_gdf.crs = "EPSG:4326"

In [15]:
nominatim_RUCA = nominatim_gdf.sjoin(RUCA_gdf, how='left')
nominatim_RUCA

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,"Population Density (per square mile), 2010",STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,349.783230,01,097,001200,1400000US01097001200,01097001200,12,CT,2.602505e+07,31767445.0
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,4756.091516,01,097,003205,1400000US01097003205,01097003205,32.05,CT,1.936616e+06,8344.0
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,773.010775,01,073,011703,1400000US01073011703,01073011703,117.03,CT,3.051574e+07,20405.0
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,98.975030,01,009,050102,1400000US01009050102,01009050102,501.02,CT,1.847918e+08,1051310.0
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,4290.344048,01,073,004500,1400000US01073004500,01073004500,45,CT,3.020202e+06,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,4.187994,56,033,000500,1400000US56033000500,56033000500,5,CT,4.044815e+09,4707707.0
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,6822.918356,56,001,963400,1400000US56001963400,56001963400,9634,CT,1.325723e+06,0.0
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,5991.882061,56,037,970602,1400000US56037970602,56037970602,9706.02,CT,1.497769e+06,0.0
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,1833.800802,56,025,000600,1400000US56025000600,56025000600,6,CT,1.036468e+07,0.0


In [16]:
nominatim_RUCA = nominatim_RUCA.drop(columns=["index_right", "State-County FIPS Code", "Select State", "Select County", "State-County-Tract FIPS Code", "Secondary RUCA Code, 2010 (see errata)", 
                                              "Tract Population, 2010", "Land Area (square miles), 2010", "Land Area (square miles), 2010", 
                                              "STATEFP", "COUNTYFP", "TRACTCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER"])
nominatim_RUCA

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,state,zip,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,"Population Density (per square mile), 2010"
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,Alabama,36603,AL,"450 ST EMMANUEL ST, MOBILE, AL 36603",30.680809,-88.041740,POINT (-88.04174 30.68081),"450, St Emmanuel Street , Mobile, Mobile Coun...",1.0,349.783230
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,Alabama,36609,AL,"3900 PLEASANT VALLEY RD, MOBILE, AL 36609",30.662790,-88.146510,POINT (-88.14651 30.66279),"3900, Pleasant Valley Road , Mobile, Mobile C...",1.0,4756.091516
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,Alabama,35071,AL,"1800 DECATUR HWY, GARDENDALE, AL 35071",33.667229,-86.816798,POINT (-86.81680 33.66723),"1800, Decatur Highway , Gardendale, Jefferson...",1.0,773.010775
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,Alabama,35121,AL,"27605 STATE HWY 75, ONEONTA, AL 35121",33.933428,-86.497810,POINT (-86.49781 33.93343),"27605, State Hwy 75 , Oneonta, Blount County,...",7.0,98.975030
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,Alabama,35233,AL,"619 SOUTH NINETEENTH STREET, BIRMINGHAM, AL 35233",33.505603,-86.802035,POINT (-86.80204 33.50560),"619,South Nineteenth Street , Birmingham, Jef...",1.0,4290.344048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,Wyoming,82801,WY,"5901 COFFEEN AVE LOT 67, SHERIDAN, WY 82801",44.733738,-106.949627,POINT (-106.94963 44.73374),"5901, Coffeen Avenue , Sheridan, Sheridan Cou...",5.0,4.187994
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,Wyoming,82072,WY,"1215 E GIBBON ST, LARAMIE, WY 82072",41.317888,-105.580370,POINT (-105.58037 41.31789),"1215,East Gibbon Street , Laramie, Albany Cou...",4.0,6822.918356
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,Wyoming,82935,WY,"2190 W TETON BLVD, GREEN RIVER, WY 82935",41.495797,-109.460196,POINT (-109.46020 41.49580),"2190,West Teton Boulevard , Green River, Swee...",4.0,5991.882061
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,Wyoming,82601,WY,"2155 FRANCES ST, CASPER, WY 82601",42.827141,-106.316367,POINT (-106.31637 42.82714),"2155, Frances Street , Casper, Natrona County...",1.0,1833.800802


In [20]:
nominatim_RUCA["rural"] = nominatim_RUCA["Primary RUCA Code 2010"].apply(lambda x: rurality(x))
nominatim_RUCA

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,zip,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,"Population Density (per square mile), 2010",rural
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,36603,AL,"450 ST EMMANUEL ST, MOBILE, AL 36603",30.680809,-88.041740,POINT (-88.04174 30.68081),"450, St Emmanuel Street , Mobile, Mobile Coun...",1.0,349.783230,0
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,36609,AL,"3900 PLEASANT VALLEY RD, MOBILE, AL 36609",30.662790,-88.146510,POINT (-88.14651 30.66279),"3900, Pleasant Valley Road , Mobile, Mobile C...",1.0,4756.091516,0
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,35071,AL,"1800 DECATUR HWY, GARDENDALE, AL 35071",33.667229,-86.816798,POINT (-86.81680 33.66723),"1800, Decatur Highway , Gardendale, Jefferson...",1.0,773.010775,0
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,35121,AL,"27605 STATE HWY 75, ONEONTA, AL 35121",33.933428,-86.497810,POINT (-86.49781 33.93343),"27605, State Hwy 75 , Oneonta, Blount County,...",7.0,98.975030,1
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,35233,AL,"619 SOUTH NINETEENTH STREET, BIRMINGHAM, AL 35233",33.505603,-86.802035,POINT (-86.80204 33.50560),"619,South Nineteenth Street , Birmingham, Jef...",1.0,4290.344048,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,82801,WY,"5901 COFFEEN AVE LOT 67, SHERIDAN, WY 82801",44.733738,-106.949627,POINT (-106.94963 44.73374),"5901, Coffeen Avenue , Sheridan, Sheridan Cou...",5.0,4.187994,1
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,82072,WY,"1215 E GIBBON ST, LARAMIE, WY 82072",41.317888,-105.580370,POINT (-105.58037 41.31789),"1215,East Gibbon Street , Laramie, Albany Cou...",4.0,6822.918356,1
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,82935,WY,"2190 W TETON BLVD, GREEN RIVER, WY 82935",41.495797,-109.460196,POINT (-109.46020 41.49580),"2190,West Teton Boulevard , Green River, Swee...",4.0,5991.882061,1
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,82601,WY,"2155 FRANCES ST, CASPER, WY 82601",42.827141,-106.316367,POINT (-106.31637 42.82714),"2155, Frances Street , Casper, Natrona County...",1.0,1833.800802,0


## Nominatim 1k Sample
* New 1k addresses

In [11]:
nominatim_df = pd.read_csv(nominatim_sample_1k_path)

# convert to geopandas dataframe
nominatim_gdf = gpd.GeoDataFrame(nominatim_df, geometry=nominatim_df['geometry'].apply(shapely.wkt.loads))
nominatim_gdf.crs = "EPSG:4326"

nominatim_RUCA = nominatim_gdf.sjoin(RUCA_gdf, how='left')
nominatim_RUCA = nominatim_RUCA.drop(columns=["index_right", "State-County FIPS Code", "Select State", "Select County", "State-County-Tract FIPS Code", "Secondary RUCA Code, 2010 (see errata)", 
                                              "Tract Population, 2010", "Land Area (square miles), 2010", "Land Area (square miles), 2010", 
                                              "STATEFP", "COUNTYFP", "TRACTCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER"])
nominatim_RUCA["rural"] = nominatim_RUCA["Primary RUCA Code 2010"].apply(lambda x: rurality(x))
nominatim_RUCA

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,zip,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,"Population Density (per square mile), 2010",rural
0,35717,186,,,Justice,Avenue,,Crenshaw County,186 Justice Avenue,,...,36049,AL,"186 JUSTICE AVENUE, LUVERNE, AL 36049",31.728911,-86.288605,POINT (-86.28860 31.72891),"186, Justice Avenue , Luverne, Crenshaw Count...",2.0,22.010257,0
1,25979,3690,,,Grandview,Parkway,,Jefferson County,3690 Grandview Parkway,,...,35243,AL,"3690 GRANDVIEW PARKWAY, BIRMINGHAM, AL 35243",33.432934,-86.717441,POINT (-86.71744 33.43293),"3690, Grandview Parkway , Birmingham, Jeffers...",1.0,1357.186344,0
2,37108,3802,,,Spring,Avenue,Southwest,Morgan County,3802 Spring Ave Sw,,...,35603,AL,"3802 SPRING AVE SW, DECATUR, AL 35603",34.575407,-86.989779,POINT (-86.98978 34.57541),"3802, Spring Avenue Southwest, Decatur, Morga...",1.0,3264.539180,0
3,19361,44825,,Highway,17,,,Lamar County,44825 Highway 17,,...,35592,AL,"44825 Highway 17, Vernon, Alabama 35592",33.760578,-88.108610,POINT (-88.10861 33.76058),"44825, Highway 17 , Vernon, Lamar County, Ala...",10.0,30.009745,1
4,35230,3520,,,Marion Spillway,Road,,Elmore County,3520 Marion Spillway Road,,...,36025,AL,"3520 MARION SPILLWAY ROAD, ELMORE, AL 36025",32.574305,-86.320806,POINT (-86.32081 32.57430),"3520, Marion Spillway Road , Elmore, Elmore C...",1.0,158.967741,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1051,10732,570,,,Marion,Street,,Sheridan County,570 Marion St,,...,82801,WY,"570 MARION ST, SHERIDAN, WY 82801",44.803733,-106.959038,POINT (-106.95904 44.80373),"570, Marion Street , Sheridan, Sheridan Count...",4.0,2101.786298,1
1052,5164,1251,North,,21st,Street,,Albany County,1251 N 21St St,,...,82072,WY,"1251 N 21ST ST, LARAMIE, WY 82072",41.322809,-105.569705,POINT (-105.56971 41.32281),"1251,North 21st Street , Laramie, Albany Coun...",4.0,2614.293105,1
1053,10759,954,,,Mccue St 97,,,Albany County,954 Mccue St 97,,...,82072,WY,"954 MCCUE ST 97, LARAMIE, WY 82072",41.321566,-105.609783,POINT (-105.60978 41.32157),"954, Mccue St 97 , Laramie, Albany County, W...",4.0,658.449686,1
1054,10778,252,,,Dell Range,Boulevard,,Laramie County,252 Dell Range Blvd,,...,82009,WY,"252 DELL RANGE BLVD, CHEYENNE, WY 82009",41.164098,-104.824091,POINT (-104.82409 41.16410),"252, Dell Range Boulevard , Cheyenne, Laramie...",1.0,1871.577856,0


## Determine if on tribal land

In [12]:
tribal_priority_folder = os.path.join(abs_path, 'output/Tribal_Priority_2_5')
tribal_priority_file = 'Tribal_Priority_2_5_combined.shp'
tribal_priority_path = os.path.join(tribal_priority_folder, tribal_priority_file)

In [13]:
tribal_priority_shapes = gpd.read_file(tribal_priority_path)
tribal_priority_shapes = tribal_priority_shapes.to_crs("EPSG:4326")

In [23]:
nominatim_RUCA_tribal = nominatim_RUCA.sjoin(tribal_priority_shapes, how='left')
nominatim_RUCA_tribal

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,rural,index_right,OBJECTID,GEOID_1,NAME_1,NAMELSAD_1,Eligible_L,Shape_Leng,Shape_Le_1,Shape_Area
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,0,,,,,,,,,
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,0,,,,,,,,,
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,0,,,,,,,,,
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,1,,,,,,,,,
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,1,,,,,,,,,
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,1,,,,,,,,,
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,1,,,,,,,,,
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,0,,,,,,,,,


In [30]:
nominatim_RUCA_tribal['tribal'] = nominatim_RUCA_tribal.OBJECTID.apply(lambda x: 0 if np.isnan(x) else 1)
nominatim_RUCA_tribal

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,index_right,OBJECTID,GEOID_1,NAME_1,NAMELSAD_1,Eligible_L,Shape_Leng,Shape_Le_1,Shape_Area,tribal
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,,,,,,,,,,0
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,,,,,,,,,,0
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,,,,,,,,,,0
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,,,,,,,,,,0
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,,,,,,,,,,0
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,,,,,,,,,,0
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,,,,,,,,,,0
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,,,,,,,,,,0


In [31]:
nominatim_RUCA_tribal.columns

Index(['location_id', 'AddressNumber', 'StreetNamePreDirectional',
       'StreetNamePreType', 'StreetName', 'StreetNamePostType',
       'StreetNamePostDirectional', 'county', 'address_1', 'address_2', 'city',
       'state', 'zip', 'state_abbr', 'location_source_value', 'latitude',
       'longitude', 'geometry', 'Nominatim_address', 'Primary RUCA Code 2010',
       'Population Density (per square mile), 2010', 'rural', 'index_right',
       'OBJECTID', 'GEOID_1', 'NAME_1', 'NAMELSAD_1', 'Eligible_L',
       'Shape_Leng', 'Shape_Le_1', 'Shape_Area', 'tribal'],
      dtype='object')

In [32]:
nominatim_RUCA_tribal = nominatim_RUCA_tribal.drop(columns=['Population Density (per square mile), 2010', 'index_right',
                                                            'OBJECTID', 'GEOID_1', 'NAME_1', 'NAMELSAD_1', 'Eligible_L',
                                                            'Shape_Leng', 'Shape_Le_1', 'Shape_Area',])
nominatim_RUCA_tribal

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,zip,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,rural,tribal
0,33702,450,,,St Emmanuel,Street,,Mobile County,450 St Emmanuel St,,...,36603,AL,"450 ST EMMANUEL ST, MOBILE, AL 36603",30.680809,-88.041740,POINT (-88.04174 30.68081),"450, St Emmanuel Street , Mobile, Mobile Coun...",1.0,0,0
1,43138,3900,,,Pleasant Valley,Road,,Mobile County,3900 Pleasant Valley Rd,,...,36609,AL,"3900 PLEASANT VALLEY RD, MOBILE, AL 36609",30.662790,-88.146510,POINT (-88.14651 30.66279),"3900, Pleasant Valley Road , Mobile, Mobile C...",1.0,0,0
2,37054,1800,,,Decatur,Highway,,Jefferson County,1800 Decatur Hwy,,...,35071,AL,"1800 DECATUR HWY, GARDENDALE, AL 35071",33.667229,-86.816798,POINT (-86.81680 33.66723),"1800, Decatur Highway , Gardendale, Jefferson...",1.0,0,0
3,43315,27605,,State Hwy,75,,,Blount County,27605 State Hwy 75,,...,35121,AL,"27605 STATE HWY 75, ONEONTA, AL 35121",33.933428,-86.497810,POINT (-86.49781 33.93343),"27605, State Hwy 75 , Oneonta, Blount County,...",7.0,1,0
4,26017,619,South,,Nineteenth,Street,,Jefferson County,619 South Nineteenth Street,,...,35233,AL,"619 SOUTH NINETEENTH STREET, BIRMINGHAM, AL 35233",33.505603,-86.802035,POINT (-86.80204 33.50560),"619,South Nineteenth Street , Birmingham, Jef...",1.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,9830,5901,,,Coffeen,Avenue,,Sheridan County,5901 Coffeen Ave,LOT 67,...,82801,WY,"5901 COFFEEN AVE LOT 67, SHERIDAN, WY 82801",44.733738,-106.949627,POINT (-106.94963 44.73374),"5901, Coffeen Avenue , Sheridan, Sheridan Cou...",5.0,1,0
524,10733,1215,East,,Gibbon,Street,,Albany County,1215 E Gibbon St,,...,82072,WY,"1215 E GIBBON ST, LARAMIE, WY 82072",41.317888,-105.580370,POINT (-105.58037 41.31789),"1215,East Gibbon Street , Laramie, Albany Cou...",4.0,1,0
525,10723,2190,West,,Teton,Boulevard,,Sweetwater County,2190 W Teton Blvd,,...,82935,WY,"2190 W TETON BLVD, GREEN RIVER, WY 82935",41.495797,-109.460196,POINT (-109.46020 41.49580),"2190,West Teton Boulevard , Green River, Swee...",4.0,1,0
526,10712,2155,,,Frances,Street,,Natrona County,2155 Frances St,,...,82601,WY,"2155 FRANCES ST, CASPER, WY 82601",42.827141,-106.316367,POINT (-106.31637 42.82714),"2155, Frances Street , Casper, Natrona County...",1.0,0,0


In [None]:
nominatim_RUCA_tribal["rural_tribal"] = nominatim_RUCA_tribal.apply(lambda x: 1 if x.rural == 1 and x.tribal == 1 else 0, axis=1)
nominatim_RUCA_tribal

In [38]:
nominatim_RUCA_tribal_path = os.path.join(abs_path, 'output', 'nominatim_sample_rural_tribal.csv')
nominatim_RUCA_tribal.to_csv(nominatim_RUCA_tribal_path, index=False)

## Nominatim 1k Sample

In [14]:
nominatim_RUCA_tribal = nominatim_RUCA.sjoin(tribal_priority_shapes, how='left')

nominatim_RUCA_tribal['tribal'] = nominatim_RUCA_tribal.OBJECTID.apply(lambda x: 0 if np.isnan(x) else 1)

nominatim_RUCA_tribal = nominatim_RUCA_tribal.drop(columns=['Population Density (per square mile), 2010', 'index_right',
                                                            'OBJECTID', 'GEOID_1', 'NAME_1', 'NAMELSAD_1', 'Eligible_L',
                                                            'Shape_Leng', 'Shape_Le_1', 'Shape_Area',])

nominatim_RUCA_tribal["rural_tribal"] = nominatim_RUCA_tribal.apply(lambda x: 1 if x.rural == 1 and x.tribal == 1 else 0, axis=1)
nominatim_RUCA_tribal

Unnamed: 0,location_id,AddressNumber,StreetNamePreDirectional,StreetNamePreType,StreetName,StreetNamePostType,StreetNamePostDirectional,county,address_1,address_2,...,state_abbr,location_source_value,latitude,longitude,geometry,Nominatim_address,Primary RUCA Code 2010,rural,tribal,rural_tribal
0,35717,186,,,Justice,Avenue,,Crenshaw County,186 Justice Avenue,,...,AL,"186 JUSTICE AVENUE, LUVERNE, AL 36049",31.728911,-86.288605,POINT (-86.28860 31.72891),"186, Justice Avenue , Luverne, Crenshaw Count...",2.0,0,0,0
1,25979,3690,,,Grandview,Parkway,,Jefferson County,3690 Grandview Parkway,,...,AL,"3690 GRANDVIEW PARKWAY, BIRMINGHAM, AL 35243",33.432934,-86.717441,POINT (-86.71744 33.43293),"3690, Grandview Parkway , Birmingham, Jeffers...",1.0,0,0,0
2,37108,3802,,,Spring,Avenue,Southwest,Morgan County,3802 Spring Ave Sw,,...,AL,"3802 SPRING AVE SW, DECATUR, AL 35603",34.575407,-86.989779,POINT (-86.98978 34.57541),"3802, Spring Avenue Southwest, Decatur, Morga...",1.0,0,0,0
3,19361,44825,,Highway,17,,,Lamar County,44825 Highway 17,,...,AL,"44825 Highway 17, Vernon, Alabama 35592",33.760578,-88.108610,POINT (-88.10861 33.76058),"44825, Highway 17 , Vernon, Lamar County, Ala...",10.0,1,0,0
4,35230,3520,,,Marion Spillway,Road,,Elmore County,3520 Marion Spillway Road,,...,AL,"3520 MARION SPILLWAY ROAD, ELMORE, AL 36025",32.574305,-86.320806,POINT (-86.32081 32.57430),"3520, Marion Spillway Road , Elmore, Elmore C...",1.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1051,10732,570,,,Marion,Street,,Sheridan County,570 Marion St,,...,WY,"570 MARION ST, SHERIDAN, WY 82801",44.803733,-106.959038,POINT (-106.95904 44.80373),"570, Marion Street , Sheridan, Sheridan Count...",4.0,1,0,0
1052,5164,1251,North,,21st,Street,,Albany County,1251 N 21St St,,...,WY,"1251 N 21ST ST, LARAMIE, WY 82072",41.322809,-105.569705,POINT (-105.56971 41.32281),"1251,North 21st Street , Laramie, Albany Coun...",4.0,1,0,0
1053,10759,954,,,Mccue St 97,,,Albany County,954 Mccue St 97,,...,WY,"954 MCCUE ST 97, LARAMIE, WY 82072",41.321566,-105.609783,POINT (-105.60978 41.32157),"954, Mccue St 97 , Laramie, Albany County, W...",4.0,1,0,0
1054,10778,252,,,Dell Range,Boulevard,,Laramie County,252 Dell Range Blvd,,...,WY,"252 DELL RANGE BLVD, CHEYENNE, WY 82009",41.164098,-104.824091,POINT (-104.82409 41.16410),"252, Dell Range Boulevard , Cheyenne, Laramie...",1.0,0,0,0


In [15]:
nominatim_RUCA_tribal_path = os.path.join(abs_path, 'output', 'nominatim_sample_1k_rural_tribal.csv')
nominatim_RUCA_tribal.to_csv(nominatim_RUCA_tribal_path, index=False)