# Libraries and Global Variables

In [19]:
import pandas as pd
import os
import numpy as np
import matplotlib.ticker as mtick
from IPython.display import display, Markdown

#import arcgis libraries
from arcgis.gis import *
gis = GIS()

In [20]:
working_directory = os.getcwd()
data_folder         = os.path.join(working_directory, "data"        )
intermediate_folder = os.path.join(working_directory, "intermediate")
results_folder      = os.path.join(working_directory, "results"     )
acs_folder          = os.path.join(data_folder      , "ACS"         )
taz_folder          = os.path.join(working_directory, "TAZ"         )
acs_filename        = "ACSST5Y2019.S1401_data_with_overlays_2022-01-19T171953.csv"

shp_taz =  os.path.join(taz_folder, r"USTM_TAZ_2021_09_22.shp")

#column codes from ACS data, will verify later in process
pub_col  = "S1401_C03_010E" #Estimate!!In public school!!Population enrolled in college or graduate school
pri_col = "S1401_C05_010E" #Estimate!!In private school!!Population enrolled in college or graduate school
tot_col   = "S1401_C01_010E" #Estimate!!Total!!Population enrolled in college or graduate school

pub = "ACS_Public"
pri = "ACS_Private"
tot = "ACS_Total"

print(working_directory)
print(data_folder)
print(results_folder)
print(acs_folder)

e:\GitHub\TDM-College-Enrollment-v9
e:\GitHub\TDM-College-Enrollment-v9\data
e:\GitHub\TDM-College-Enrollment-v9\results
e:\GitHub\TDM-College-Enrollment-v9\data\ACS


In [21]:
#df_TAZ_resunit = pd.pivot_table(df_TAZ_resunitcount, values='UNIT_COUNT', index=['CO_TAZID'],
#                    columns=['TYPE'], aggfunc=np.sum)
#df_TAZ_resunit = df_TAZ_resunit.fillna(0)
#df_TAZ_resunit

In [22]:
#tract shapefile location
shp_tract = os.path.join(acs_folder, r"tl_2019_49_tract\tl_2019_49_tract.shp")
print(shp_tract)

#import into spatially-enabled DataFrame
sdf_tract = pd.DataFrame.spatial.from_featureclass(shp_tract)

#delete unwanted columns
sdf_tract = sdf_tract.drop(columns=['FID', 'NAME','NAMELSAD','MTFCC','FUNCSTAT','ALAND','AWATER','INTPTLAT','INTPTLON'])

sdf_tract['GEO_ID'] = '1400000US' + sdf_tract['GEOID']

sdf_tract.head()

e:\GitHub\TDM-College-Enrollment-v9\data\ACS\tl_2019_49_tract\tl_2019_49_tract.shp


Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,SHAPE,GEO_ID
0,49,49,2101,49049002101,"{""rings"": [[[-111.700557, 40.256361], [-111.70...",1400000US49049002101
1,49,49,2102,49049002102,"{""rings"": [[[-111.695308, 40.244827], [-111.69...",1400000US49049002102
2,49,35,113101,49035113101,"{""rings"": [[[-112.005461, 40.602444], [-112.00...",1400000US49035113101
3,49,5,1201,49005001201,"{""rings"": [[[-111.835015, 41.699234], [-111.83...",1400000US49005001201
4,49,35,113102,49035113102,"{""rings"": [[[-112.004958, 40.580537], [-112.00...",1400000US49035113102


In [23]:
#create map1 with zoom set to Salt Lake (any city or place name can be used)
map1 = gis.map('Salt Lake')

#add tract sdf
sdf_tract.spatial.plot(map_widget = map1,
                       renderer_type='u', # specify the unique value renderer using its notation 'u'
                       col='GEOID'  # column to get unique values from
                       )

map1.layout.height='600px'

#add map title
display(Markdown('<h2><center>Census Tracts</center></h2>'))

#display the map
map1

<h2><center>Census Tracts</center></h2>

MapView(layout=Layout(height='600px', width='100%'))

# Create Tract to Zip Code DataFrame

## Create spatially-enabled dataframe (sdf) with tract centroids

In [24]:
#get coordinates of centroid of sdf, just as test to see if spatial attributes retained
print(sdf_tract_college.spatial.centroid)

NameError: name 'sdf_tract_college' is not defined

In [25]:
#get coordinates of centroid of first shape in sdf, just as test
sdf_tract_college.iloc[0].SHAPE.centroid

NameError: name 'sdf_tract_college' is not defined

In [None]:
#create column with x and y coordinates of centroids
sdf_tract_centroid = sdf_tract_college[['GEOID','SHAPE',pub,pri,tot]].copy()
sdf_tract_centroid['x_lon'] = sdf_tract_centroid.apply(lambda row: row.SHAPE.centroid[0], axis = 1)
sdf_tract_centroid['y_lat'] = sdf_tract_centroid.apply(lambda row: row.SHAPE.centroid[1], axis = 1)

sdf_tract_centroid.head()

Unnamed: 0,GEOID,SHAPE,ACS_Public,ACS_Private,ACS_Total,x_lon,y_lat
0,49049000000.0,"{""rings"": [[[-111.700557, 40.256361], [-111.70...",221,231,452,-111.68798,40.253586
1,49049000000.0,"{""rings"": [[[-111.695308, 40.244827], [-111.69...",211,131,342,-111.681001,40.247234
2,49035110000.0,"{""rings"": [[[-112.005461, 40.602444], [-112.00...",370,44,414,-111.993122,40.596819
3,49005000000.0,"{""rings"": [[[-111.835015, 41.699234], [-111.83...",238,52,290,-111.81449,41.671331
4,49035110000.0,"{""rings"": [[[-112.004958, 40.580537], [-112.00...",195,48,243,-111.983825,40.585294


In [None]:
#create sdf of just centroid points
#use from_xy functionality to give points shape
sdf_points = pd.DataFrame.spatial.from_xy(sdf_tract_centroid,'x_lon','y_lat') 

In [None]:
#verify that shape exists and sdf was created correctly
print(sdf_points.iloc[0].SHAPE)

{'spatialReference': {'wkid': 4326}, 'x': -111.68798006685456, 'y': 40.25358575487886}


## Read in ZIP Code shapefile as sdf and change projection

In [None]:
#import zipcode DataFrame from shapefile
zip_shp = working_directory + r"\ZIPCode\ZIPCode.shp"
sdf_zip = pd.DataFrame.spatial.from_featureclass(zip_shp)

In [None]:
#check attribute of bounding box of zipcode sdf
print(sdf_zip.spatial.bbox)

{'rings': [[[228585.5, 4094775.51], [228585.5, 4653578.01], [673940.5, 4653578.01], [673940.5, 4094775.51], [228585.5, 4094775.51]]], 'spatialReference': {'wkid': 26912, 'latestWkid': 26912}}


In [None]:
#change projection to WGS84 to match tracts
sdf_zip.spatial.project(4326)

True

In [None]:
#check projection change by looking at bounding box coordinates
print(sdf_zip.spatial.bbox)

{'rings': [[[-114.05292513469266, 36.99769411237049], [-114.05292513469266, 42.00171782734483], [-109.04150092483052, 42.00171782734483], [-109.04150092483052, 36.99769411237049], [-114.05292513469266, 36.99769411237049]]], 'spatialReference': {'wkid': 4326, 'latestWkid': 4326}}


## Join Tract and ZipCodes

In [None]:
#spatial join tract centroid data to zip code data
sdf_tract_zipjoin = sdf_tract_centroid.spatial.join(sdf_zip)

sdf_tract_zipjoin.head()

Unnamed: 0,GEOID,SHAPE,ACS_Public,ACS_Private,ACS_Total,x_lon,y_lat,index_right,FID,zip5,countynbr,name,symbol
0,49049000000.0,"{""spatialReference"": {""wkid"": 4326}, ""x"": -111...",221,231,452,-111.68798,40.253586,220,220,84604,25,PROVO,3
1,49049000000.0,"{""spatialReference"": {""wkid"": 4326}, ""x"": -111...",211,131,342,-111.681001,40.247234,220,220,84604,25,PROVO,3
2,49049000000.0,"{""spatialReference"": {""wkid"": 4326}, ""x"": -111...",207,814,1021,-111.623677,40.256324,220,220,84604,25,PROVO,3
3,49049000000.0,"{""spatialReference"": {""wkid"": 4326}, ""x"": -111...",1514,1656,3170,-111.670009,40.258522,220,220,84604,25,PROVO,3
4,49049000000.0,"{""spatialReference"": {""wkid"": 4326}, ""x"": -111...",173,178,351,-111.636904,40.27687,220,220,84604,25,PROVO,3


In [None]:
#create map centered on Salt Lake
map_tract_zip = gis.map('Salt Lake')

#plot sdf
sdf_tract_zipjoin.spatial.plot(map_widget=map_tract_zip)

#add map title
display(Markdown('<h2><center>Census Tracts Centroids Joined to Zip Codes</center></h2>'))

#display map
map_tract_zip

<h2><center>Census Tracts Centroids Joined to Zip Codes</center></h2>

MapView(layout=Layout(height='400px', width='100%'))

# Tract to TAZ Dissagregation, Scoring of TAZs (using parcel data from REMM)

In [None]:
df_buildings = pd.read_csv(buildings_filename, usecols=['parcel_id','general_type','residential_units'])

#only include residential general_type
df_buildings = df_buildings[(df_buildings.general_type == 'SF Residential') | (df_buildings.general_type == 'MF Residential')]
df_buildings

Unnamed: 0,residential_units,parcel_id,general_type
0,1.0,487357,SF Residential
1,1.0,468869,SF Residential
2,1.0,544735,MF Residential
3,1.0,14751,SF Residential
4,1.0,14619,SF Residential
...,...,...,...
676235,4.0,591505,MF Residential
676236,34.0,646852,SF Residential
676237,1.0,572186,MF Residential
676238,5.0,764670,SF Residential


In [None]:
df_parcel_taz = pd.read_csv(parcel_taz_filename, usecols=['parcel_id','TAZID'])
df_parcel_taz

Unnamed: 0,parcel_id,TAZID
0,671124.0,2246
1,588354.0,2246
2,664267.0,2246
3,745122.0,2246
4,670759.0,2246
...,...,...
841701,739015.0,1830
841702,739015.0,1830
841703,739015.0,1830
841704,739015.0,1830


In [None]:
#join
df_building_taz = pd.merge(df_buildings, df_parcel_taz, on="parcel_id", how="inner")
df_building_taz

Unnamed: 0,residential_units,parcel_id,general_type,TAZID
0,1.0,487357,SF Residential,294
1,1.0,468869,SF Residential,217
2,1.0,544735,MF Residential,312
3,1.0,14751,SF Residential,1032
4,1.0,14619,SF Residential,1032
...,...,...,...,...
629450,2.0,666800,SF Residential,2337
629451,4.0,591505,MF Residential,2360
629452,34.0,646852,SF Residential,1865
629453,1.0,572186,MF Residential,2398


In [None]:
df_taz_resunits = df_building_taz.groupby(['TAZID','general_type'], as_index=False).agg({'residential_units': [np.size, np.sum]})

#collapse multi-level
df_taz_resunits.columns = df_taz_resunits.columns.get_level_values(0)

#rename columns
df_taz_resunits.columns = ['TAZID', 'general_type', 'Parcels', 'Units']

df_taz_resunits

Unnamed: 0,TAZID,general_type,Parcels,Units
0,141,MF Residential,8.0,13.0
1,141,SF Residential,126.0,123.0
2,142,MF Residential,1.0,1.0
3,142,SF Residential,30.0,30.0
4,143,SF Residential,50.0,52.0
...,...,...,...,...
3819,2868,MF Residential,5.0,6.0
3820,2868,SF Residential,73.0,77.0
3821,2869,SF Residential,12.0,12.0
3822,2870,SF Residential,6.0,6.0


In [None]:
#pivot with rows of TAZID, columns of general_type, and values
df_taz_resunits_pvt = df_taz_resunits.pivot(index='TAZID', columns='general_type')['Units']

#reset index column to remove general_type name
df_taz_resunits_pvt.reset_index(level=0, inplace=True)
df_taz_resunits_pvt.columns= ['TAZID','MF','SF']

#replace NaN with zeros
df_taz_resunits_pvt["MF"] = df_taz_resunits_pvt["MF"].fillna(0)
df_taz_resunits_pvt["SF"] = df_taz_resunits_pvt["SF"].fillna(0)

df_taz_resunits_pvt

Unnamed: 0,TAZID,MF,SF
0,141,13.0,123.0
1,142,1.0,30.0
2,143,0.0,52.0
3,144,0.0,31.0
4,145,2.0,172.0
...,...,...,...
2291,2867,24.0,160.0
2292,2868,6.0,77.0
2293,2869,0.0,12.0
2294,2870,0.0,6.0


In [None]:
df_taz_resunits_pvt.sum()

TAZID    3435772.0
MF        265953.0
SF        565158.0
dtype: float64

In [None]:
#create score for distributing from census tract to TAZ
df_taz_resunits_scores = df_taz_resunits_pvt.copy()

df_taz_resunits_scores['score01'] =  df_taz_resunits_pvt.apply(lambda row: row.MF*1 + row.SF*1, axis = 1)
df_taz_resunits_scores['score05'] =  df_taz_resunits_pvt.apply(lambda row: row.MF*5 + row.SF*1, axis = 1)
df_taz_resunits_scores['score10'] =  df_taz_resunits_pvt.apply(lambda row: row.MF*10 + row.SF*1, axis = 1)

df_taz_resunits_scores

Unnamed: 0,TAZID,MF,SF,score01,score05,score10
0,141,13.0,123.0,136.0,188.0,253.0
1,142,1.0,30.0,31.0,35.0,40.0
2,143,0.0,52.0,52.0,52.0,52.0
3,144,0.0,31.0,31.0,31.0,31.0
4,145,2.0,172.0,174.0,182.0,192.0
...,...,...,...,...,...,...
2291,2867,24.0,160.0,184.0,280.0,400.0
2292,2868,6.0,77.0,83.0,107.0,137.0
2293,2869,0.0,12.0,12.0,12.0,12.0
2294,2870,0.0,6.0,6.0,6.0,6.0


In [None]:
csv_taz_tract = os.path.join(data_folder, r"TAZ_CensusTract.csv")

#csv has two header rows
df_taz_tract = pd.read_csv(csv_taz_tract, usecols=['TAZID','GEOID'])

df_taz_tract

Unnamed: 0,TAZID,GEOID
0,460,49011125401
1,461,49011125401
2,425,49011125401
3,427,49011125401
4,457,49011125401
...,...,...
2876,898,49035113511
2877,1422,49035113527
2878,940,49035113532
2879,1439,49035112907


In [None]:
df_taz_resunits_scores_geoid = pd.merge(df_taz_resunits_scores,df_taz_tract,on='TAZID',how='inner')
df_taz_resunits_scores_geoid

Unnamed: 0,TAZID,MF,SF,score01,score05,score10,GEOID
0,141,13.0,123.0,136.0,188.0,253.0,49057210403
1,142,1.0,30.0,31.0,35.0,40.0,49057210403
2,143,0.0,52.0,52.0,52.0,52.0,49057210403
3,144,0.0,31.0,31.0,31.0,31.0,49057210403
4,145,2.0,172.0,174.0,182.0,192.0,49057210403
...,...,...,...,...,...,...,...
2291,2867,24.0,160.0,184.0,280.0,400.0,49049010600
2292,2868,6.0,77.0,83.0,107.0,137.0,49049010600
2293,2869,0.0,12.0,12.0,12.0,12.0,49049010600
2294,2870,0.0,6.0,6.0,6.0,6.0,49049010113


In [None]:
df_taz_resunits_scores_geoid_sums = df_taz_resunits_scores_geoid.groupby(['GEOID'], as_index=False).agg({'SF': [np.size, np.sum], 'MF': [np.sum], 'score01': [np.sum], 'score05': [np.sum], 'score10': [np.sum]})


#collapse multi-level
df_taz_resunits_scores_geoid_sums.columns = df_taz_resunits_scores_geoid_sums.columns.get_level_values(0)

#rename columns
df_taz_resunits_scores_geoid_sums.columns = ['GEOID', 'tractTAZs', 'tractSF', 'tractMF', 'tractScore01', 'tractScore05', 'tractScore10']


df_taz_resunits_scores_geoid_sums

Unnamed: 0,GEOID,tractTAZs,tractSF,tractMF,tractScore01,tractScore05,tractScore10
0,49011125102,3.0,1488.0,128.0,1616.0,2128.0,2768.0
1,49011125103,4.0,1392.0,509.0,1901.0,3937.0,6482.0
2,49011125104,4.0,1692.0,139.0,1831.0,2387.0,3082.0
3,49011125200,1.0,61.0,0.0,61.0,61.0,61.0
4,49011125301,4.0,1605.0,666.0,2271.0,4935.0,8265.0
...,...,...,...,...,...,...,...
436,49057210900,5.0,2593.0,433.0,3026.0,4758.0,6923.0
437,49057211000,2.0,1103.0,15.0,1118.0,1178.0,1253.0
438,49057211100,5.0,1439.0,611.0,2050.0,4494.0,7549.0
439,49057211201,3.0,1448.0,680.0,2128.0,4848.0,8248.0


In [None]:
df_taz_resunits_scores_geoid_sums.sum()

GEOID           2.162604e+13
tractTAZs       2.296000e+03
tractSF         5.651580e+05
tractMF         2.659530e+05
tractScore01    8.311110e+05
tractScore05    1.894923e+06
tractScore10    3.224688e+06
dtype: float64

In [None]:
df_taz_resunits_scores_geoid_withsums = pd.merge(df_taz_resunits_scores_geoid, df_taz_resunits_scores_geoid_sums, on='GEOID', how='left')
df_taz_resunits_scores_geoid_withsums

Unnamed: 0,TAZID,MF,SF,score01,score05,score10,GEOID,tractTAZs,tractSF,tractMF,tractScore01,tractScore05,tractScore10
0,141,13.0,123.0,136.0,188.0,253.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0
1,142,1.0,30.0,31.0,35.0,40.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0
2,143,0.0,52.0,52.0,52.0,52.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0
3,144,0.0,31.0,31.0,31.0,31.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0
4,145,2.0,172.0,174.0,182.0,192.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2291,2867,24.0,160.0,184.0,280.0,400.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0
2292,2868,6.0,77.0,83.0,107.0,137.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0
2293,2869,0.0,12.0,12.0,12.0,12.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0
2294,2870,0.0,6.0,6.0,6.0,6.0,49049010113,14.0,2180.0,31.0,2211.0,2335.0,2490.0


In [None]:
df_taz_resunits_scores_geoid_withsums['score01_tractshare'] =  df_taz_resunits_scores_geoid_withsums.apply(lambda row: row.score01 / row.tractScore01, axis = 1)
df_taz_resunits_scores_geoid_withsums['score05_tractshare'] =  df_taz_resunits_scores_geoid_withsums.apply(lambda row: row.score05 / row.tractScore05, axis = 1)
df_taz_resunits_scores_geoid_withsums['score10_tractshare'] =  df_taz_resunits_scores_geoid_withsums.apply(lambda row: row.score10 / row.tractScore10, axis = 1)
df_taz_resunits_scores_geoid_withsums

Unnamed: 0,TAZID,MF,SF,score01,score05,score10,GEOID,tractTAZs,tractSF,tractMF,tractScore01,tractScore05,tractScore10,score01_tractshare,score05_tractshare,score10_tractshare
0,141,13.0,123.0,136.0,188.0,253.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0,0.040130,0.034938,0.032143
1,142,1.0,30.0,31.0,35.0,40.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0,0.009147,0.006504,0.005082
2,143,0.0,52.0,52.0,52.0,52.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0,0.015344,0.009664,0.006607
3,144,0.0,31.0,31.0,31.0,31.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0,0.009147,0.005761,0.003939
4,145,2.0,172.0,174.0,182.0,192.0,49057210403,22.0,2891.0,498.0,3389.0,5381.0,7871.0,0.051343,0.033823,0.024393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2291,2867,24.0,160.0,184.0,280.0,400.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0,0.137007,0.159909,0.176913
2292,2868,6.0,77.0,83.0,107.0,137.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0,0.061802,0.061108,0.060593
2293,2869,0.0,12.0,12.0,12.0,12.0,49049010600,27.0,1241.0,102.0,1343.0,1751.0,2261.0,0.008935,0.006853,0.005307
2294,2870,0.0,6.0,6.0,6.0,6.0,49049010113,14.0,2180.0,31.0,2211.0,2335.0,2490.0,0.002714,0.002570,0.002410


In [None]:
sdf_taz = pd.DataFrame.spatial.from_featureclass(taz_filename)
sdf_taz

Unnamed: 0,FID,TAZID,SORT,CO_TAZID,SUBAREAID,EXTERNAL,ACRES,DEVACRES,X,Y,...,DLRG_NAME,DISTMED,DMED_NAME,DISTSML,DSML_NAME,AIRSAGE,WF_AIRSAGE,SLC,NAAPM25,SHAPE
0,0,460,1,110460,1,0,322.29,322.29,409807.97148,4.551224e+06,...,North Davis,11,Clinton-W.Pt-Clearfield,98,Clinton-W.Pt-Clearfield,1106,13,2,NonAttnmnt|11|WFRC,"{""rings"": [[[409006.358836148, 4551638.5268631..."
1,1,461,2,110461,1,0,316.91,316.91,409818.05584,4.552025e+06,...,North Davis,11,Clinton-W.Pt-Clearfield,98,Clinton-W.Pt-Clearfield,1106,13,2,NonAttnmnt|11|WFRC,"{""rings"": [[[410620.28939738194, 4551617.45141..."
2,2,656,3,350656,1,0,828.83,701.91,407896.91458,4.518164e+06,...,North-west SL,17,NW Quad-International,155,NW Quad-International,3518,32,1,NonAttnmnt|35|WFRC,"{""rings"": [[[408728.6159094579, 4519566.605985..."
3,3,657,4,350657,1,0,531.35,531.28,409430.46088,4.518727e+06,...,North-west SL,17,NW Quad-International,155,NW Quad-International,3518,32,1,NonAttnmnt|35|WFRC,"{""rings"": [[[408712.0078618436, 4517726.402942..."
4,4,2696,5,492696,1,0,575.28,574.81,435443.25244,4.435982e+06,...,Southwest,45,Payson-Salem,550,Payson-Salem,4907,43,2,NonAttnmnt|49|MAG,"{""rings"": [[[436219.45121278096, 4436668.96657..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2876,2876,2307,2877,492307,1,0,163.96,163.96,442094.43254,4.463009e+06,...,Central,41,Orem-Vineyard,508,Orem-Vineyard,4904,40,2,NonAttnmnt|49|MAG,"{""rings"": [[[441685.71000000136, 4463418.93999..."
2877,2877,692,2878,350692,1,0,413.00,348.59,405441.29072,4.506191e+06,...,North-west SL,19,MVC-Magna-ATK,163,MVC-Magna-ATK,3519,33,2,NonAttnmnt|35|WFRC,"{""rings"": [[[406311.6023608913, 4505925.350275..."
2878,2878,697,2879,350697,1,0,290.96,290.96,407021.63812,4.506072e+06,...,North-west SL,19,MVC-Magna-ATK,164,MVC-Magna-ATK,3519,33,2,NonAttnmnt|35|WFRC,"{""rings"": [[[406289.42766158096, 4506464.54348..."
2879,2879,698,2880,350698,1,0,161.32,161.32,407310.52727,4.505313e+06,...,North-west SL,19,MVC-Magna-ATK,164,MVC-Magna-ATK,3519,33,2,NonAttnmnt|35|WFRC,"{""rings"": [[[407750.5751155648, 4505187.891955..."


In [None]:
sdf_taz_geoidscores = pd.merge(sdf_taz,df_taz_resunits_scores_geoid_withsums,on='TAZID',how='left')
sdf_taz_geoidscores

Unnamed: 0,FID,TAZID,SORT,CO_TAZID,SUBAREAID,EXTERNAL,ACRES,DEVACRES,X,Y,...,GEOID,tractTAZs,tractSF,tractMF,tractScore01,tractScore05,tractScore10,score01_tractshare,score05_tractshare,score10_tractshare
0,0,460,1,110460,1,0,322.29,322.29,409807.97148,4.551224e+06,...,4.901113e+10,10.0,3512.0,1039.0,4551.0,8707.0,13902.0,0.029664,0.038015,0.041433
1,1,461,2,110461,1,0,316.91,316.91,409818.05584,4.552025e+06,...,4.901113e+10,10.0,3512.0,1039.0,4551.0,8707.0,13902.0,0.125467,0.237855,0.283844
2,2,656,3,350656,1,0,828.83,701.91,407896.91458,4.518164e+06,...,,,,,,,,,,
3,3,657,4,350657,1,0,531.35,531.28,409430.46088,4.518727e+06,...,,,,,,,,,,
4,4,2696,5,492696,1,0,575.28,574.81,435443.25244,4.435982e+06,...,4.904901e+10,44.0,773.0,378.0,1151.0,2663.0,4553.0,0.011295,0.004882,0.002855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2876,2876,2307,2877,492307,1,0,163.96,163.96,442094.43254,4.463009e+06,...,4.904900e+10,3.0,986.0,686.0,1672.0,4416.0,7846.0,0.205742,0.104167,0.077109
2877,2877,692,2878,350692,1,0,413.00,348.59,405441.29072,4.506191e+06,...,4.903511e+10,8.0,2086.0,725.0,2811.0,5711.0,9336.0,0.000356,0.000175,0.000107
2878,2878,697,2879,350697,1,0,290.96,290.96,407021.63812,4.506072e+06,...,4.903511e+10,8.0,2086.0,725.0,2811.0,5711.0,9336.0,0.327641,0.205393,0.159383
2879,2879,698,2880,350698,1,0,161.32,161.32,407310.52727,4.505313e+06,...,4.903511e+10,8.0,2086.0,725.0,2811.0,5711.0,9336.0,0.262540,0.335143,0.362468


# Student Housing - Dorms

In [14]:
#https://services1.arcgis.com/99lidPhWCzftIe9K/arcgis/rest/services/CensusBlocks2020/FeatureServer
item = gis.content.get("2caf01e704614114868a3d801b82def6")
flayer = item.layers[0]

# create a Spatially Enabled DataFrame object
sdfUtahCensusBlocks2020 = pd.DataFrame.spatial.from_layer(flayer)


Unnamed: 0,ALAND20,AWATER20,BLOCKCE20,COUNTYFP20,FUNCSTAT20,GEOID20,GQ_ADLTCOR,GQ_INSOTH,GQ_JUVCOR,GQ_MILTARY,...,PP_TOTAL,PP_WHTALN,SHAPE,STATEFP20,Shape__Area,Shape__Length,TRACTCE20,UACE20,UATYPE20,UR20
0,67739,0,4135,047,S,490479402014135,0,0,0,0,...,0,0,"{""rings"": [[[-12225637.7146, 4918434.1247], [-...",49,1.167956e+05,1488.962562,940201,,,
1,197870,0,4116,047,S,490479402014116,0,0,0,0,...,1,1,"{""rings"": [[[-12224779.5528, 4921690.4466], [-...",49,3.414048e+05,3755.357290,940201,,,
2,4796029,0,4038,047,S,490479402014038,0,0,0,0,...,0,0,"{""rings"": [[[-12234086.9793, 4941958.6215], [-...",49,8.310653e+06,15845.989826,940201,,,
3,915708,0,2013,047,S,490479682012013,0,0,0,0,...,39,39,"{""rings"": [[[-12187827.1517, 4927912.9818], [-...",49,1.581722e+06,6413.947397,968201,,,
4,40427,0,1031,035,S,490351139091031,0,0,0,0,...,116,93,"{""rings"": [[[-12475282.8603, 4953873.0851], [-...",49,7.020497e+04,1589.856914,113909,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71202,128089,0,2047,005,S,490050002012047,0,0,0,0,...,0,0,"{""rings"": [[[-12452318.5537, 5132690.9828], [-...",49,2.307504e+05,2269.568300,000201,,,
71203,79764,0,1020,005,S,490050005021020,0,0,0,0,...,534,440,"{""rings"": [[[-12450398.2914, 5125631.9218], [-...",49,1.434790e+05,1601.916115,000502,,,
71204,2959,0,3004,043,S,490439643043004,0,0,0,0,...,9,3,"{""rings"": [[[-12419945.9321, 4974420.9888], [-...",49,5.159109e+03,287.590755,964304,,,
71205,120106,0,1000,005,S,490050009001000,0,0,0,0,...,390,360,"{""rings"": [[[-12447506.0986, 5121416.9132], [-...",49,2.158585e+05,1923.038444,000900,,,


Index(['ALAND20', 'AWATER20', 'BLOCKCE20', 'COUNTYFP20', 'FUNCSTAT20',
       'GEOID20', 'GQ_ADLTCOR', 'GQ_INSOTH', 'GQ_JUVCOR', 'GQ_MILTARY',
       'GQ_NINOTH', 'GQ_NURS', 'GQ_STUDENT', 'GQ_TOTAL', 'GQ_TOTINS',
       'GQ_TOTNIN', 'HU_OCC', 'HU_TOTAL', 'HU_VAC', 'INTPTLAT20', 'INTPTLON20',
       'LOGRECNO', 'MTFCC20', 'NAME20', 'OBJECTID', 'PP_AGGTE18', 'PP_ASNALN',
       'PP_BAAALN', 'PP_HISPLAT', 'PP_HPIALN', 'PP_MTRACE', 'PP_NAMALN',
       'PP_OTHALN', 'PP_SNRACE', 'PP_TOTAL', 'PP_WHTALN', 'SHAPE', 'STATEFP20',
       'Shape__Area', 'Shape__Length', 'TRACTCE20', 'UACE20', 'UATYPE20',
       'UR20'],
      dtype='object')

In [16]:

# GQ_STUDENT is P0050008 College/University student housing
# filter out everything except student housing
sdfUtahCensusBlocks2020 = sdfUtahCensusBlocks2020[sdfUtahCensusBlocks2020['GQ_STUDENT']>0]
display(sdfUtahCensusBlocks2020)
display(sdfUtahCensusBlocks2020.columns)

Unnamed: 0,ALAND20,AWATER20,BLOCKCE20,COUNTYFP20,FUNCSTAT20,GEOID20,GQ_ADLTCOR,GQ_INSOTH,GQ_JUVCOR,GQ_MILTARY,...,PP_TOTAL,PP_WHTALN,SHAPE,STATEFP20,Shape__Area,Shape__Length,TRACTCE20,UACE20,UATYPE20,UR20
1156,100775,0,2007,049,S,490490011032007,0,0,0,0,...,562,463,"{""rings"": [[[-12435869.2912, 4906034.4025], [-...",49,173317.246094,2716.174571,001103,,,
1894,28022,0,1009,049,S,490490019001009,0,0,0,0,...,921,804,"{""rings"": [[[-12430088.9137, 4901842.0633], [-...",49,48153.085938,992.327078,001900,,,
2826,283564,0,2001,053,S,490532713002001,0,0,0,0,...,506,355,"{""rings"": [[[-12642389.8789, 4453525.5358], [-...",49,446627.105469,2839.298132,271300,,,
3315,335692,0,1005,005,S,490050007021005,0,0,0,0,...,465,408,"{""rings"": [[[-12447156.2215, 5122260.5425], [-...",49,603435.773438,3329.429733,000702,,,
4049,68095,0,2001,005,S,490050007022001,0,0,0,0,...,599,537,"{""rings"": [[[-12446543.2963, 5123838.5001], [-...",49,122439.718750,1536.480905,000702,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66414,13207,0,1003,049,S,490490022111003,0,0,0,0,...,356,304,"{""rings"": [[[-12437036.0311, 4905894.7701], [-...",49,22713.214844,744.370320,002211,,,
67877,103503,0,3003,049,S,490490016023003,0,0,0,0,...,2291,1874,"{""rings"": [[[-12428893.5648, 4902808.9195], [-...",49,177889.664062,2393.673151,001602,,,
68282,7806,0,3016,035,S,490351148003016,0,0,0,0,...,38,28,"{""rings"": [[[-12450974.2432, 4978380.5905], [-...",49,13622.582031,517.007208,114800,,,
68286,14664,0,1005,035,S,490351033001005,0,0,0,0,...,279,236,"{""rings"": [[[-12451836.6348, 4972465.1413], [-...",49,25560.601562,888.196839,103300,,,


Index(['ALAND20', 'AWATER20', 'BLOCKCE20', 'COUNTYFP20', 'FUNCSTAT20',
       'GEOID20', 'GQ_ADLTCOR', 'GQ_INSOTH', 'GQ_JUVCOR', 'GQ_MILTARY',
       'GQ_NINOTH', 'GQ_NURS', 'GQ_STUDENT', 'GQ_TOTAL', 'GQ_TOTINS',
       'GQ_TOTNIN', 'HU_OCC', 'HU_TOTAL', 'HU_VAC', 'INTPTLAT20', 'INTPTLON20',
       'LOGRECNO', 'MTFCC20', 'NAME20', 'OBJECTID', 'PP_AGGTE18', 'PP_ASNALN',
       'PP_BAAALN', 'PP_HISPLAT', 'PP_HPIALN', 'PP_MTRACE', 'PP_NAMALN',
       'PP_OTHALN', 'PP_SNRACE', 'PP_TOTAL', 'PP_WHTALN', 'SHAPE', 'STATEFP20',
       'Shape__Area', 'Shape__Length', 'TRACTCE20', 'UACE20', 'UATYPE20',
       'UR20'],
      dtype='object')

In [17]:
#create column with x and y coordinates of centroids
sdfUtahCensusBlocks2020_centroid = sdfUtahCensusBlocks2020[['GEOID20','SHAPE','GQ_STUDENT']].copy()
sdfUtahCensusBlocks2020_centroid['x_lon'] = sdfUtahCensusBlocks2020_centroid.apply(lambda row: row.SHAPE.centroid[0], axis = 1)
sdfUtahCensusBlocks2020_centroid['y_lat'] = sdfUtahCensusBlocks2020_centroid.apply(lambda row: row.SHAPE.centroid[1], axis = 1)

sdfUtahCensusBlocks2020_centroid.head()

Unnamed: 0,GEOID20,SHAPE,GQ_STUDENT,x_lon,y_lat
1156,490490011032007,"{""rings"": [[[-12435869.2912, 4906034.4025], [-...",300,-12435310.0,4905932.0
1894,490490019001009,"{""rings"": [[[-12430088.9137, 4901842.0633], [-...",596,-12429950.0,4901760.0
2826,490532713002001,"{""rings"": [[[-12642389.8789, 4453525.5358], [-...",461,-12642000.0,4453626.0
3315,490050007021005,"{""rings"": [[[-12447156.2215, 5122260.5425], [-...",388,-12446610.0,5122489.0
4049,490050007022001,"{""rings"": [[[-12446543.2963, 5123838.5001], [-...",11,-12446270.0,5123725.0


In [18]:
sdf_taz

NameError: name 'sdf_taz' is not defined