## Outline

In this section of the tutorial we combine pre-processing tools to obtain a full pipeline from raw data to a mobility metric. In an application for perceived segregation. 

- Ingest data with filters on read (polygon, deduplication, low_ha) --> small sample first
- Analyze completeness and noise of selected users (daily_q > 0.5, HAU, DAU)

## Part 1: LA neighborhoods 

In [1]:
import geopandas as gpd

In [2]:
zip1 = 'US.91001'
wkt_poly_1 = "POLYGON ((-118.577505 34.070578, -118.577311 34.070943, -118.578025 34.071574, -118.578235 34.072377, -118.577323 34.073198, -118.577114 34.073982, -118.578195 34.074891, -118.578632 34.075686, -118.578355 34.076989, -118.579826 34.077487, -118.580106 34.078929, -118.579752 34.079352, -118.579637 34.080475, -118.580493 34.081069, -118.582732 34.081617, -118.583276 34.082356, -118.584507 34.082632, -118.585786 34.083384, -118.585764 34.084748, -118.586535 34.085448, -118.586536 34.086198, -118.587247 34.086571, -118.58677 34.08828, -118.587505 34.089118, -118.587532 34.089858, -118.587222 34.09091, -118.585248 34.09125, -118.582463 34.093137, -118.581594 34.096281, -118.580096 34.097382, -118.580214 34.098469, -118.578385 34.100109, -118.574824 34.101449, -118.573205 34.103519, -118.570921 34.104864, -118.572187 34.106405, -118.572925 34.10673, -118.571529 34.108001, -118.569608 34.108861, -118.569797 34.109181, -118.57146 34.109321, -118.572708 34.109861, -118.573229 34.110264, -118.573444 34.110983, -118.575713 34.110794, -118.576329 34.111415, -118.564752 34.130168, -118.565316 34.130383, -118.564799 34.131111, -118.564017 34.131147, -118.563654 34.130769, -118.559995 34.129993, -118.559837 34.129382, -118.558005 34.127742, -118.557365 34.126608, -118.55661 34.126563, -118.556072 34.127197, -118.555847 34.128179, -118.554614 34.128011, -118.553816 34.128663, -118.553622 34.128247, -118.554813 34.127015, -118.554587 34.126521, -118.55356 34.126037, -118.552411 34.125968, -118.551586 34.126314, -118.549483 34.126434, -118.548086 34.125821, -118.546207 34.126079, -118.545229 34.126585, -118.54441 34.126548, -118.542059 34.127543, -118.541651 34.128931, -118.539306 34.1298, -118.539417 34.131153, -118.537802 34.131169, -118.537358 34.130637, -118.536088 34.130294, -118.535566 34.131132, -118.534757 34.13165, -118.534475 34.130163, -118.534054 34.129902, -118.533453 34.129934, -118.533261 34.130556, -118.532661 34.130953, -118.531871 34.129644, -118.528333 34.128921, -118.527129 34.128936, -118.526273 34.128558, -118.523056 34.12842, -118.52182 34.127995, -118.520718 34.128742, -118.518977 34.129031, -118.518817 34.12719, -118.517541 34.126212, -118.516463 34.122708, -118.517012 34.121477, -118.518829 34.120731, -118.520654 34.118862, -118.521475 34.117544, -118.520756 34.113642, -118.520941 34.11258, -118.520459 34.111623, -118.52144 34.103825, -118.520682 34.099877, -118.519322 34.098107, -118.519554 34.096293, -118.519118 34.096042, -118.519449 34.094067, -118.518732 34.089604, -118.51893 34.088747, -118.517583 34.08691, -118.516132 34.08264, -118.514256 34.081985, -118.512454 34.080246, -118.51184 34.078081, -118.510553 34.076701, -118.509813 34.074894, -118.509692 34.074239, -118.512761 34.072282, -118.512287 34.072207, -118.512172 34.071862, -118.511363 34.071779, -118.511425 34.071109, -118.510444 34.070519, -118.510178 34.069941, -118.508211 34.069043, -118.507643 34.068061, -118.505358 34.066724, -118.505229 34.065926, -118.504764 34.065503, -118.505257 34.065182, -118.50531 34.064287, -118.504187 34.066151, -118.504805 34.066912, -118.504167 34.067457, -118.503338 34.066617, -118.502849 34.067435, -118.502536 34.067061, -118.501053 34.066679, -118.498753 34.064608, -118.496606 34.064071, -118.495258 34.062439, -118.495237 34.060384, -118.496146 34.056388, -118.498813 34.054499, -118.499837 34.053025, -118.49987 34.051556, -118.499288 34.051668, -118.499171 34.051086, -118.496229 34.050223, -118.497398 34.048466, -118.496873 34.04807, -118.503965 34.040724, -118.50458 34.041293, -118.50731 34.040196, -118.509355 34.041402, -118.510044 34.041046, -118.51365 34.033646, -118.51509 34.033061, -118.515773 34.034498, -118.515271 34.034902, -118.515373 34.035598, -118.513348 34.037548, -118.512309 34.039885, -118.51281 34.040538, -118.512267 34.040954, -118.513405 34.042344, -118.514166 34.040952, -118.514098 34.03994, -118.515321 34.040444, -118.515424 34.041355, -118.513951 34.043227, -118.514238 34.04341, -118.515865 34.043106, -118.515872 34.044137, -118.514433 34.044926, -118.514964 34.045563, -118.516503 34.044656, -118.517201 34.041847, -118.51751 34.041447, -118.518212 34.041304, -118.518216 34.038508, -118.519633 34.034701, -118.519062 34.031548, -118.518866 34.031382, -118.518374 34.03203, -118.51712 34.031047, -118.51805 34.030434, -118.517867 34.03004, -118.51892 34.029219, -118.52009 34.026898, -118.521865 34.024999, -118.52219 34.025361, -118.523872 34.025804, -118.527945 34.028254, -118.531388 34.029714, -118.531577 34.02957, -118.542552 34.034927, -118.546074 34.036308, -118.550209 34.036712, -118.55249 34.036126, -118.55359 34.035383, -118.554565 34.035161, -118.556773 34.035276, -118.567251 34.038401, -118.566901 34.041193, -118.570107 34.047117, -118.570395 34.069346, -118.577505 34.070578))"

zip2 = 'US.90272'
wkt_poly_2 = "POLYGON ((-118.10042 34.190442, -118.101043 34.189947, -118.102033 34.189938, -118.10214 34.190687, -118.102616 34.190745, -118.10418 34.193334, -118.104845 34.193386, -118.105857 34.192597, -118.104588 34.190708, -118.104922 34.190323, -118.104859 34.189209, -118.103843 34.187929, -118.103586 34.186584, -118.103049 34.186653, -118.103204 34.186478, -118.102138 34.185187, -118.102439 34.184923, -118.100395 34.183328, -118.100288 34.180371, -118.098256 34.175685, -118.094998 34.175635, -118.095553 34.174225, -118.099342 34.174642, -118.104152 34.174968, -118.10426 34.174641, -118.105069 34.174489, -118.10747 34.174937, -118.109831 34.176132, -118.10997 34.176735, -118.110546 34.177045, -118.110461 34.17761, -118.112069 34.178538, -118.113155 34.177886, -118.115871 34.178128, -118.115889 34.178516, -118.118104 34.1785, -118.118361 34.177967, -118.120271 34.177823, -118.121562 34.178098, -118.121515 34.17848, -118.129664 34.178418, -118.129661 34.177154, -118.132061 34.177026, -118.142202 34.181565, -118.146251 34.181539, -118.164115 34.181436, -118.164082 34.178405, -118.164363 34.17769, -118.169043 34.181078, -118.168984 34.181328, -118.169593 34.181432, -118.170614 34.184858, -118.170279 34.185925, -118.169714 34.18548, -118.168734 34.185445, -118.168722 34.18919, -118.17172 34.189287, -118.170609 34.190122, -118.17052 34.191134, -118.169218 34.192461, -118.168592 34.190674, -118.168582 34.188804, -118.167491 34.188809, -118.167506 34.190535, -118.166872 34.190256, -118.166503 34.191233, -118.168528 34.192094, -118.168455 34.19325, -118.166489 34.196811, -118.165452 34.199835, -118.165232 34.201869, -118.166022 34.203841, -118.165814 34.205321, -118.166499 34.206284, -118.168089 34.207092, -118.168152 34.208164, -118.16974 34.208934, -118.170035 34.209388, -118.169366 34.210486, -118.168476 34.210949, -118.170064 34.210554, -118.170438 34.211009, -118.170245 34.211236, -118.169729 34.210953, -118.169661 34.211363, -118.168951 34.211378, -118.169855 34.212445, -118.168725 34.212796, -118.168264 34.213539, -118.167281 34.214094, -118.166865 34.215406, -118.167842 34.215146, -118.168516 34.215978, -118.166874 34.216465, -118.167398 34.217157, -118.166058 34.217743, -118.16583 34.218153, -118.166317 34.217177, -118.16601 34.216686, -118.165583 34.216714, -118.164397 34.217798, -118.160475 34.216922, -118.160514 34.217649, -118.159592 34.217411, -118.158433 34.217884, -118.157665 34.219597, -118.155657 34.219321, -118.155431 34.218879, -118.154058 34.219258, -118.153232 34.219851, -118.150977 34.219911, -118.15104 34.219309, -118.150493 34.219026, -118.149774 34.219148, -118.148117 34.217424, -118.147335 34.217169, -118.145125 34.217327, -118.145936 34.215933, -118.148954 34.215853, -118.150206 34.216436, -118.150639 34.216131, -118.150392 34.215828, -118.148859 34.215148, -118.136227 34.21453, -118.117895 34.204116, -118.106823 34.195829, -118.104808 34.194856, -118.104807 34.196844, -118.10048 34.196844, -118.10042 34.190442))"

zips_poly = gpd.GeoSeries.from_wkt([wkt_poly_1, wkt_poly_2]).unary_union

  zips_poly = gpd.GeoSeries.from_wkt([wkt_poly_1, wkt_poly_2]).unary_union


## Part 2: LA county (continental)

In [3]:
fips_3 = '06037' #LA COUNTY
wkt_poly_3 = "POLYGON ((-117.704725 34.093957, -117.730125 34.021371, -117.76769 34.023506, -117.767483 34.004611, -117.785062 34.004809, -117.802539 33.975551, -117.783287 33.946411, -117.976498 33.94605, -117.976593 33.90281, -118.058918 33.846121, -118.063162 33.81961, -118.084377 33.803433, -118.096705 33.779085, -118.09197 33.758472, -118.11951 33.737064, -118.1259 33.697151, -118.237008 33.690595, -118.274239 33.663429, -118.319135 33.659547, -118.345415 33.663427, -118.466962 33.725524, -118.485577 33.753664, -118.484483 33.803154, -118.443968 33.839057, -118.447254 33.84876, -118.557356 33.987673, -118.727459 33.980307, -118.809827 33.946905, -118.841116 33.955371, -118.873998 33.983314, -118.951721 33.992858, -118.940965 34.07483, -118.788889 34.168214, -118.668152 34.168195, -118.667713 34.240404, -118.632495 34.240426, -118.636789 34.291804, -118.894634 34.817972, -118.881729 34.817802, -118.883381 34.808637, -118.870926 34.803109, -118.854114 34.803279, -118.854253 34.817772, -117.667292 34.822526, -117.667034 34.558008, -117.659994 34.55804, -117.646374 34.28917, -117.704725 34.093957))"

## Part 3: Ithaca NY 

In [4]:
fips_4 = '36109' #TOMKINS COUNTY
wkt_poly_4 = 'POLYGON ((-76.492989 42.281166, -76.691406 42.284307, -76.685726 42.375108, -76.696655 42.54679, -76.585989 42.54991, -76.626761 42.573868, -76.666543 42.623457, -76.265584 42.623588, -76.253359 42.407568, -76.293168 42.406572, -76.299641 42.384546, -76.239854 42.35987, -76.250149 42.296676, -76.288174 42.296764, -76.28822 42.308227, -76.350619 42.308437, -76.350871 42.318288, -76.415305 42.318368, -76.416284 42.262977, -76.474494 42.263761, -76.473962 42.281132, -76.492989 42.281166))'

## Data ingestion using base Nomad?

In [5]:
import nomad.io.base as loader 

In [6]:
data_path = 's3://catalog-pickwell/pw-full-locations25/device-visits/geography_id_1=US/date=2025-04-22/'
loader.table_columns(data_path, format='csv', sep = "\t")

Index(['timestamp', 'device_aid', 'device_aid_type', 'latitude', 'longitude',
       'horizontal_accuracy', 'altitude', 'altitude_accuracy',
       'location_method', 'ip', 'user_agent', 'OS', 'OS_version',
       'manufacturer', 'model', 'carrier'],
      dtype='object')

In [7]:
traj_cols = {'longitude':'longitude',
             'latitude':'latitude',
             'ha':'horizontal_accuracy',
             'user_id':'device_aid',
             'timestamp':'timestamp',
             'date':'date'}

In [None]:
users = loader.sample_users(data_path, format='csv', size=0.1, within=zips_poly, data_crs="EPSG:4326", poly_crs="EPSG:4326", sep="\t", traj_cols=traj_cols)



In [None]:
users