In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
tracts = gpd.read_file("/media/palewire/Passport/hard-to-count-tracts.shp")

In [3]:
len(tracts)

72845

In [4]:
tracts.head()

Unnamed: 0,GEOID10,TotPopACS1,MRR2010,MRR20pctth,UE_flag,HTCcombofl,geometry
0,1005950300,1813,77.9,0,0,0,"POLYGON ((-85.52743699999999 31.866503, -85.52..."
1,1005950900,3888,74.6,0,0,0,"POLYGON ((-85.16412199999999 31.830595, -85.16..."
2,1005950800,2157,83.5,0,0,0,"POLYGON ((-85.148718 31.909335, -85.1466349999..."
3,1005950700,1775,79.6,0,0,0,"POLYGON ((-85.14578399999999 31.891491, -85.14..."
4,1005950600,2120,79.4,0,0,0,"POLYGON ((-85.14572299999999 31.890106, -85.14..."


In [5]:
tribal_lands = gpd.read_file("data/tiger/tl_2018_us_aiannh/tl_2018_us_aiannh.shp")

In [6]:
len(tribal_lands)

845

In [10]:
trimmed_tribal = tribal_lands[['GEOID', 'NAMELSAD', 'geometry']]

In [11]:
prepped_tribal = trimmed_tribal.rename(columns={
    "GEOID": "geoid_tribal",
    "NAMELSAD": "name_tribal",
})

In [12]:
prepped_tribal.head()

Unnamed: 0,geoid_tribal,name_tribal,geometry
0,2320R,Mohegan Reservation,"(POLYGON ((-72.089125 41.480395, -72.089004 41..."
1,9100R,Golden Hill Paugussett (state) Reservation,"(POLYGON ((-72.26917299999999 41.554725, -72.2..."
2,4110R,Table Mountain Rancheria,"POLYGON ((-119.640915 36.984246, -119.640807 3..."
3,1380R,Greenville Rancheria,"POLYGON ((-120.897365 40.150998, -120.897334 4..."
4,1110R,Flathead Reservation,"POLYGON ((-114.851209 47.887207, -114.819894 4..."


In [40]:
merged = gpd.sjoin(
    tracts,
    prepped_tribal,
    op="within",
    how="left"
)

In [41]:
len(merged)

72845

In [42]:
merged.head()

Unnamed: 0,GEOID10,TotPopACS1,MRR2010,MRR20pctth,UE_flag,HTCcombofl,geometry,index_right,geoid_tribal,name_tribal
0,1005950300,1813,77.9,0,0,0,"POLYGON ((-85.52743699999999 31.866503, -85.52...",,,
1,1005950900,3888,74.6,0,0,0,"POLYGON ((-85.16412199999999 31.830595, -85.16...",,,
2,1005950800,2157,83.5,0,0,0,"POLYGON ((-85.148718 31.909335, -85.1466349999...",,,
3,1005950700,1775,79.6,0,0,0,"POLYGON ((-85.14578399999999 31.891491, -85.14...",,,
4,1005950600,2120,79.4,0,0,0,"POLYGON ((-85.14572299999999 31.890106, -85.14...",,,


In [43]:
merged['is_tribal'] = merged.geoid_tribal.apply(
    lambda x: 0 if pd.isnull(x) else 1
)

In [44]:
deduped = merged.drop_duplicates(["GEOID10"])

In [45]:
len(deduped)

72845

In [46]:
deduped.groupby([
    'is_tribal',
]).agg(
    {
        "GEOID10": "count",
        "TotPopACS1": "sum"
    }
)

Unnamed: 0_level_0,GEOID10,TotPopACS1
is_tribal,Unnamed: 1_level_1,Unnamed: 2_level_1
0,72167,318200708
1,678,2679988


In [47]:
deduped.groupby([
    'is_tribal',
    'MRR20pctth',
]).agg(
    {
        "GEOID10": "count",
        "TotPopACS1": "sum"
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,GEOID10,TotPopACS1
is_tribal,MRR20pctth,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,58016,260013788
0,1,14151,58186920
1,0,469,1914874
1,1,209,765114


In [48]:
deduped.groupby([
    'is_tribal',
    'UE_flag',
]).agg(
    {
        "GEOID10": "count",
        "TotPopACS1": "sum"
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,GEOID10,TotPopACS1
is_tribal,UE_flag,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,71762,316887877
0,1,405,1312831
1,0,636,2519814
1,1,42,160174


In [50]:
deduped.groupby([
    'is_tribal',
    'HTCcombofl'
]).agg(
    {
        "GEOID10": "count",
        "TotPopACS1": "sum"
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,GEOID10,TotPopACS1
is_tribal,HTCcombofl,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,57618,258726205
0,1,14542,59449255
0,2,7,25248
1,0,427,1754700
1,1,251,925288


In [51]:
deduped[deduped.is_tribal == 1].to_file(
    "data/analysis/tracts-on-tribal-lands.shp"
)