In [1]:
from utz import *
from IPython.display import HTML, SVG, display

import fiona
import geopandas as gpd
from geopy import distance
import shapefile
from shapely.geometry import Point

import plotly.express as px
import plotly.graph_objects as go

import sys
sys.executable

'/opt/homebrew/Caskroom/mambaforge/base/envs/geopandas/bin/python'

## Mapbox configs

In [2]:
token = open(".mapbox-token").read()

osm = dict(mapbox=dict(style="open-street-map"))
usgs = dict(mapbox=dict(
    style="white-bg",
    layers=[{
        "below": 'traces',
        "sourcetype": "raster",
        "sourceattribution": "United States Geological Survey",
        "source": ["https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"]
    }],
))
stadia = dict(
    mapbox=dict(
        style="dark",
        accesstoken=token,
        layers=[{
            "below": 'traces',
            "sourcetype": "raster",
            "sourceattribution": '© <a href="https://stadiamaps.com/">Stadia Maps</a>, © <a href="https://openmaptiles.org/">OpenMapTiles</a> &copy; <a href="http://openstreetmap.org">OpenStreetMap</a> contributors',
            "source": ['https://tiles.stadiamaps.com/tiles/alidade_smooth_dark/{z}/{x}/{y}{r}.png'],
        }],
    )
)
mapbox = dict(mapbox=dict(
    style="dark",
    accesstoken=token,
))

## NJ/US MP10s

In [3]:
mp10s = gpd.read_file("NJ_Milepost10ths_shp/TRAN_NJ_MP_TENTH_2021_shp.shp")
mp10s

Unnamed: 0,OBJECTID,SRI,MP,ROUTE_SUBT,SLD_NAME,LATITUDE,LONGTUDE,geometry
0,1,00000049__,16.9,3,NJ 49,39.508837,-75.349123,POINT (252558.713 247146.299)
1,2,00000049__,15.8,3,NJ 49,39.520140,-75.363569,POINT (248522.471 251301.723)
2,3,00000049__,14.6,3,NJ 49,39.531545,-75.380433,POINT (243805.861 255501.826)
3,4,00000049__,14.5,3,NJ 49,39.532232,-75.382105,POINT (243336.743 255756.640)
4,5,00000049__,16.5,3,NJ 49,39.512945,-75.354385,POINT (251088.453 248656.513)
...,...,...,...,...,...,...,...,...
46861,46862,00000413_W,0.3,3,NJ 413 SECONDARY,,,POINT (-17976931348623157081452742373170435679...
46862,46863,00000444E_,126.7,4,GARDEN STATE PARKWAY EXPRESS,,,POINT (-17976931348623157081452742373170435679...
46863,46864,00000439_S,2.3,3,NJ 439 SECONDARY,,,POINT (-17976931348623157081452742373170435679...
46864,46865,00000444ES,104.2,4,GARDEN STATE PARKWAY EXPRESS SECONDARY,,,POINT (-17976931348623157081452742373170435679...


In [4]:
sxs(
    (~mp10s.isna()).sum().rename('an'),
    mp10s.isna().sum().rename('nan'),
)

Unnamed: 0,an,nan
OBJECTID,46866,0
SRI,46866,0
MP,46866,0
ROUTE_SUBT,46866,0
SLD_NAME,46866,0
LATITUDE,46599,267
LONGTUDE,46599,267
geometry,46866,0


In [5]:
mp10s[['SRI', 'MP']].dtypes

SRI     object
MP     float64
dtype: object

In [6]:
shp_sris = Series(mp10s.SRI.unique())
shp_sris

0      00000049__
1      00000077__
2      00000049_W
3      00000056__
4      00000009__
          ...    
286    00000009WS
287    00000445SS
288    00000093_S
289    00000445__
290    00000445_S
Length: 291, dtype: object

In [7]:
sri_mp_all = mp10s[['SRI', 'MP', 'LATITUDE', 'LONGTUDE']]
mp10s[sri_mp_all.duplicated(subset=['SRI', 'MP'], keep=False)]

Unnamed: 0,OBJECTID,SRI,MP,ROUTE_SUBT,SLD_NAME,LATITUDE,LONGTUDE,geometry
5954,5955,00000047_S,72.5,3,NJ 47 SECONDARY,39.83587,-75.124982,POINT (316626.709 365744.802)
5957,5958,00000047_S,72.5,3,NJ 47 SECONDARY,39.83587,-75.124982,POINT (316626.709 365744.802)
18630,18631,00000179__,0.3,3,NJ 179,40.366016,-74.943209,POINT (368631.982 558549.827)
18631,18632,00000179__,0.3,3,NJ 179,40.366016,-74.943209,POINT (368631.982 558549.827)
37879,37880,00000015__,0.3,3,NJ 15,40.886643,-74.560473,POINT (475405.552 747905.350)
37881,37882,00000015__,0.3,3,NJ 15,40.886643,-74.560473,POINT (475405.552 747905.350)


In [8]:
sri_mp = sri_mp_all.drop_duplicates().rename(columns={'LATITUDE': 'LAT', 'LONGTUDE': 'LON'})
sri_mp

Unnamed: 0,SRI,MP,LAT,LON
0,00000049__,16.9,39.508837,-75.349123
1,00000049__,15.8,39.520140,-75.363569
2,00000049__,14.6,39.531545,-75.380433
3,00000049__,14.5,39.532232,-75.382105
4,00000049__,16.5,39.512945,-75.354385
...,...,...,...,...
46861,00000413_W,0.3,,
46862,00000444E_,126.7,,
46863,00000439_S,2.3,,
46864,00000444ES,104.2,,


In [9]:
sri_mp

Unnamed: 0,SRI,MP,LAT,LON
0,00000049__,16.9,39.508837,-75.349123
1,00000049__,15.8,39.520140,-75.363569
2,00000049__,14.6,39.531545,-75.380433
3,00000049__,14.5,39.532232,-75.382105
4,00000049__,16.5,39.512945,-75.354385
...,...,...,...,...
46861,00000413_W,0.3,,
46862,00000444E_,126.7,,
46863,00000439_S,2.3,,
46864,00000444ES,104.2,,


### Build SRI → MP → LL map: { SRI: { MP/10: [ LAT, LON ] } }

In [None]:
sri_ll = sri_mp.apply(lambda r: [ r.LAT, r.LON ], axis=1).rename('LL')
sri_map = sxs(sri_mp[['SRI', 'MP']], sri_ll)
sri_map = sri_map.groupby('SRI').apply(lambda r: r.set_index('MP').sort_index().LL.to_dict()).to_dict()
sri_map

### Check SRI 0.1 MPs for gaps

In [11]:
for sri, mps in sri_map.items():
    keys = list(mps.keys())
    k, K = min(keys), max(keys)
    rng = [ mp / 10 for mp in range(int(k*10), int(K*10+1)) ]
    ks = set(keys)
    kr = set(rng)
    both = ks.intersection(kr)
    missing = list(sorted(list(kr.difference(ks))))
    extra = list(sorted(list(ks.difference(kr))))
    if missing:
        missing_ranges = []
        cur_start = None
        for idx, (cur, nxt) in enumerate(zip(missing, missing[1:] + [None])):
            if cur_start is None:
                cur_start = cur
            if nxt is None or int(cur*10 + 1) != int(nxt * 10):
                missing_ranges.append([cur_start, cur])
                cur_start = None
        print(f'{sri} missing from [{k}, {K}]:')
        for s, e in missing_ranges:
            print(f"\t{s}" if s == e else f"\t{s}:{e} ({int(10*(e-s)+1)})")
    if extra:
        raise ValueError(f'{sri}: extra {extra} ([{k}, {K}])')

00000001BS missing from [0.0, 2.7]:
	0.3
00000001TW missing from [0.0, 4.3]:
	2.3:2.6 (4)
00000001_S missing from [0.0, 64.8]:
	57.4:59.3 (19)
	60.0:60.5 (6)
	60.8:61.6 (9)
	62.1:62.7 (7)
00000007_S missing from [0.0, 4.2]:
	0.2:0.5 (4)
	1.7:3.6 (20)
00000007__ missing from [0.0, 10.1]:
	5.4:5.9 (6)
00000009WS missing from [0.3, 11.1]:
	0.8:2.5 (18)
	4.0:6.7 (28)
	7.4:9.4 (21)
00000009_S missing from [0.3, 136.3]:
	1.9:2.0 (2)
	2.4
	2.8:6.6 (39)
	7.8:23.2 (155)
	24.1:28.4 (43)
	28.8:33.0 (42)
	33.4:52.2 (189)
	52.6:54.7 (22)
	55.3:70.1 (148)
	70.7:71.3 (6)
	72.0:82.1 (101)
	82.8:87.7 (50)
	88.1:89.6 (16)
	90.0:90.1 (1)
	91.1:94.4 (34)
	94.9:101.1 (62)
	101.8:102.7 (10)
00000009__ missing from [0.2, 136.3]:
	30.7:32.0 (14)
	52.6:54.7 (22)
	91.1:94.4 (34)
00000012_W missing from [10.6, 11.6]:
	10.9:11.2 (3)
00000015_S missing from [2.0, 15.4]:
	14.2:14.7 (6)
00000021_S missing from [0.0, 14.3]:
	2.1:2.3 (2)
	2.8:3.8 (11)
00000022_W missing from [0.2, 60.5]:
	5.1:19.1 (141)
00000022__ mis

## NJDOT 2020 Crash SRIs/MPs

In [12]:
from nj_crashes.crashes import c20

Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date
0,01,01,2020-020639,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,True,False,04,2,ATLANTIC COUNTY 663,,663,,01000663__,1.34,05,,01,04,02,01,07,01,05,01,10,FE,E,ROUTE 585 / N MAIN ST / S SHORE RD,,,,25,35,26,,,N,,861,2020-08-03 02:31:00
1,01,01,2020-021265,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,I,False,False,03,2,US 30,E,30,,00000030__,52.30,02,,02,06,02,01,01,01,01,01,,AT,,CR 631 / ILLINOIS AVE / TURNER AVE,,,,45,25,26,,,N,,861,2020-08-10 17:47:00
2,01,01,2020005031,ATLANTIC,ABSECON CITY,01,PLEASANTVILLE PD,,0,0,0,0,P,I,False,False,03,2,US 9,S,9,,00000009__,41.87,02,,01,04,02,01,01,01,05,01,,AT,,CR 663 / CALIFORNIA AVE / W CALIFOR,,,,30,25,26,39.413761,-74.512961,N,,1745,2020-02-29 16:54:00
3,01,01,2020025300,ATLANTIC,ABSECON CITY,01,EGG HARBOR TWP PD,EHTPD HQ,0,0,0,0,P,B,False,False,11,1,ATLANTIC COUNTY 685,N,685,,01000685__,0.23,05,,01,04,02,01,06,01,05,01,1200,FE,N,CR 651 / MILL RD,,,,45,45,52,39.435797,-74.536607,N,,5191,2020-04-29 23:30:00
4,01,01,I-2020-000186,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,11,1,US 30,,30,,00000030__,54.26,02,,01,04,02,01,01,01,01,01,882,FE,E,CR 646 / DELILAH RD,,,,50,25,19,,,N,,847,2020-01-02 08:35:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195469,21,23,B150-2020-00350A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,04,3,ROUTE 519,S,519,,00000519__,42.80,05,,02,04,02,03,01,03,05,01,,,,,,,,50,,26,40.812200,-75.049400,N,,8246,2020-12-09 11:26:00
195470,21,23,B150-2020-00354A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,11,1,PARKING LOT OF 187 CR 519,,,,,,09,,01,04,02,02,06,04,05,01,,,,,,,,15,,56,40.805720,-75.076940,N,Mobile advertisement sign and concrete column.,8354,2020-12-12 18:45:00
195471,21,23,B150-2020-00361A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,1,0,1,0,F,I,False,False,13,1,ROUTE 519,S,519,,00000519__,38.12,05,,01,04,02,01,05,01,05,01,,AT,,WINDTRYST WAY,,,,50,25,22,40.863590,-75.014050,N,,6657,2020-12-15 06:12:00
195472,21,23,B150-2020-00369A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,12,1,ROUTE 519,S,519,,00000519__,37.00,05,,01,04,02,01,05,01,05,01,,,,,,,,50,,24,40.746800,-75.137400,N,,8246,2020-12-19 21:37:00


In [13]:
from nj_crashes.geo import is_nj_ll, is_nj, p1s, get_county

8: 3 linestrings


In [14]:
c20lls = c20[(~c20.Latitude.isna()) & (~c20.Longitude.isna())]
c20_njs = c20lls.apply(lambda r: is_nj_ll(r.Latitude, r.Longitude), axis=1)
c20_nnj = c20lls[~c20_njs]
c20_nj = c20lls[c20_njs]
c20_nnj

Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date
1225,01,02,2009-1055,ATLANTIC,ATLANTIC CITY,01,ATLANTIC CITY PD,CRASH INVESTIG,1,0,1,0,F,I,False,False,13,1,US 30,W,30,,00000030__,54.70,02,,01,04,02,01,06,04,01,01,,AT,,,,,,50,,22,39.23220,-74.28570,N,,802,2020-09-25 04:26:00
1227,01,02,A160-2020-00010A,ATLANTIC,ATLANTIC CITY,02,NEW JERSEY STATE POLICE,ATLANTIC CITY E,0,0,0,0,P,B,False,False,02,2,ATLANTIC CITY-BRIGANTINE CONNECTOR,N,446,X,00000446X_,1.96,03,,01,06,02,01,06,01,01,01,,,,,,,,35,,26,39.96549,-75.65196,N,,8104,2020-01-10 21:25:00
1255,01,02,A160-2020-00145A,ATLANTIC,ATLANTIC CITY,02,NEW JERSEY STATE POLICE,ATLANTIC CITY E,0,0,0,0,P,B,False,False,02,2,ATLANTIC CITY EXPRESSWAY,W,446,,00000446__,0.10,03,,01,04,02,01,01,01,01,01,,,,,,,,55,,26,39.65214,-75.65984,N,,7826,2020-04-29 14:32:00
1297,01,02,A160-2020-00464A,ATLANTIC,ATLANTIC CITY,02,NEW JERSEY STATE POLICE,ATLANTIC CITY E,0,0,0,0,P,B,False,False,02,2,ATLANTIC CITY EXPRESSWAY,S,446,,00000446__,0.10,03,,02,04,01,02,01,02,01,01,,,,,,,,35,,26,39.37397,-74.22548,N,,8045,2020-09-09 10:55:00
1501,01,05,A090-2020-00093A,ATLANTIC,BUENA VISTA TWP,02,NEW JERSEY STATE POLICE,BUENA VISTA - S,0,0,0,0,P,B,False,False,12,1,LANDIS AVE / OLD LANDIS AVE **,W,,,01121096__,3.55,07,,01,04,02,01,01,01,05,01,1584,FE,E,MAIN AVE,,,,45,,24,39.64343,-75.55465,N,,7697,2020-05-30 13:02:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193956,21,13,B020-2020-00136A,WARREN,KNOWLTON TWP,02,NEW JERSEY STATE POLICE,HOPE - SQUAD 1,0,0,0,0,P,B,False,False,03,2,NJ 94,S,94,,00000094__,0.50,02,,01,04,02,01,01,01,04,01,,,,,,,,40,,26,40.89800,-75.89800,N,,8340,2020-03-15 17:40:00
193989,21,13,B020-2020-00260A,WARREN,KNOWLTON TWP,02,NEW JERSEY STATE POLICE,HOPE - SQUAD 1,0,0,0,0,P,B,False,False,,2,RAMSEYBURG RD,E,,,,,07,,01,04,02,01,01,01,05,01,,,,,,,,40,,26,40.89800,-75.89800,N,,8340,2020-06-16 13:14:00
194000,21,13,B020-2020-00293A,WARREN,KNOWLTON TWP,02,NEW JERSEY STATE POLICE,HOPE - SQUAD 2,0,0,0,0,P,B,False,False,02,2,WASHINGTON ST,S,,,,,07,,01,04,02,01,01,01,05,01,,,,LOCUST,,,,25,25,26,39.94683,-75.15343,N,,8274,2020-07-03 14:34:00
194115,21,14,B150-2020-00042A,WARREN,LIBERTY TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,06,2,LAKESIDE DR N / LAKESIDE DR W **,N,,,21141008__,0.04,07,,01,06,02,01,06,01,05,01,200,FE,N,LAKESIDE DR E,,,,25,25,28,41.36980,-74.14780,N,,7590,2020-01-26 00:00:00


In [None]:
fig = px.scatter_mapbox(
    c20_nnj,
    lat="Latitude", lon="Longitude",
    color='Severity',
    hover_data=["County Name", "Date", "Severity", "Crash Location", "SRI", "MP"],
    color_discrete_sequence=["yellow", "orange", "red"],
    center=dict(lat=40.15, lon=-74.715),
    zoom=6.8,
    height=1000,
)
fig.add_trace(go.Scattermapbox(
    lat=p1s.lat, lon=p1s.lon,
    name='NJ',
    showlegend=False,
    #color=p1s.name,
    #hover_name="City",
    #hover_data=["name"],
))
legend_bgcolor = '50'
fig.update_layout(
    **mapbox,
    title=dict(
        text="Erroneous crash lat/lons",
        x=0.5, y=0.98,
        xanchor='center', yanchor='top',
        font=dict(size=32, color="white")
    ),
    legend=dict(
        title=dict(text=''),
        x=0.98, y=0.98,
        xanchor="right", yanchor="top",
        font=dict(
            size=14,
            color="white"
        ),
        bgcolor=f"rgba({legend_bgcolor},{legend_bgcolor},{legend_bgcolor},0.8)",
        bordercolor="white",
        borderwidth=2,
    ),
    margin={"r":0,"t":0,"l":0,"b":0},
)
fig.show()

In [16]:
c20[c20.Longitude > 0]

Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date


In [17]:
c20.Longitude.dropna().astype(int).value_counts().sort_index()

Longitude
-75    10497
-74    50709
-73      523
Name: count, dtype: int64

In [18]:
def sri_interp(sri, mp):
    if isna(mp):
        return
    if sri not in sri_map:
        return
    mps = sri_map[sri]

    mp10 = mp*10
    mp_lo, mp_hi = floor(mp10) / 10, ceil(mp10) / 10
    keys = list(mps.keys())
    k, K = min(keys), max(keys)
    if mp_lo not in mps or mp_hi not in mps:
        (k0, d0), (k1, d1) = sorted([ (key, abs(key - mp)) for key in keys ], key=lambda t: t[1])[:2]
        mp_lo = min(k0, k1)
        mp_hi = max(k0, k1)
        if mp_lo in mps and mp_hi in mps:
            print(f'{sri}@{mp}: recovering using closest 2: {mp_lo}, {mp_hi}')
        else:
            raise RuntimeError(f'{sri}@{mp}: recovery error: {mp_lo}, {mp_hi}')
    if mp_lo in mps and mp_hi in mps:
        if mp_lo == mp_hi:
            return mps[mp_lo]
        ll_lo = mps[mp_lo]
        ll_hi = mps[mp_hi]
        ll = [ None, None ]
        frac = (mp - mp_lo) / (mp_hi - mp_lo) 
        for i in range(2):
            ll[i] = ll_lo[i] + frac * (ll_hi[i] - ll_lo[i])
        return ll
    else:
        err(f'{sri}@{mp}: {mp_lo} {mp_lo in mps}, {mp_hi} {mp_hi in mps}, [{k},{K}]')
        return

In [None]:
%%time
sri_lls = (
    c20
    [['SRI', 'MP']]
    .apply(lambda r: sri_interp(r.SRI, r.MP), axis=1)
)
sri_lls = sri_lls.apply(lambda ll: {'LAT': nan, 'LON': nan} if ll is None else {'LAT': ll[0], 'LON': ll[1]}).apply(Series)
sri_lls

In [20]:
sxs(sri_lls.isna().sum().rename('nan'), (~sri_lls.isna()).sum().rename('an'))

Unnamed: 0,nan,an
LAT,123079,72395
LON,123079,72395


In [21]:
sxs(c20.isna().sum().rename('nan'), (~c20.isna()).sum().rename('an'))

Unnamed: 0,nan,an
County Code,0,195474
Municipality Code,0,195474
Department Case Number,0,195474
County Name,0,195474
Municipality Name,0,195474
Police Dept Code,0,195474
Police Department,0,195474
Police Station,0,195474
Total Killed,0,195474
Total Injured,0,195474


In [86]:
has_sri_ll = c20[~sri_lls.isna().any(axis=1)]
has_sri_ll

Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date
1,01,01,2020-021265,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,I,False,False,03,2,US 30,E,30,,00000030__,52.30,02,,02,06,02,01,01,01,01,01,,AT,,CR 631 / ILLINOIS AVE / TURNER AVE,,,,45,25,26,,,N,,861,2020-08-10 17:47:00
2,01,01,2020005031,ATLANTIC,ABSECON CITY,01,PLEASANTVILLE PD,,0,0,0,0,P,I,False,False,03,2,US 9,S,9,,00000009__,41.87,02,,01,04,02,01,01,01,05,01,,AT,,CR 663 / CALIFORNIA AVE / W CALIFOR,,,,30,25,26,39.413761,-74.512961,N,,1745,2020-02-29 16:54:00
4,01,01,I-2020-000186,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,11,1,US 30,,30,,00000030__,54.26,02,,01,04,02,01,01,01,01,01,882,FE,E,CR 646 / DELILAH RD,,,,50,25,19,,,N,,847,2020-01-02 08:35:00
6,01,01,I-2020-000371,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,01,2,US 9,,9,,00000009__,42.80,02,,01,04,02,01,01,01,05,01,300,FE,S,US 30 / WHITEHORSE PIKE,,,,35,40,26,,,N,,842,2020-01-04 15:56:00
7,01,01,I-2020-000391,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,01,2,US 30,E,30,,00000030__,50.76,02,,01,04,02,02,06,02,01,01,250,FE,W,HADDON AVE,,,,45,25,26,,,N,,854,2020-01-04 19:10:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195461,21,23,B150-2020-00318A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,12,1,US 46,E,46,,00000046__,6.50,02,,01,06,02,01,05,01,05,01,,,,,,,,50,,24,40.853190,-75.057040,N,,8491,2020-11-17 06:34:00
195462,21,23,B150-2020-00322A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,12,1,US 46,W,46,,00000046__,8.00,02,,02,05,02,01,01,01,04,01,,,,,,,,50,,24,40.840300,-75.035930,N,,8297,2020-11-15 11:56:00
195465,21,23,B150-2020-00328A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,02,2,US 46,E,46,,00000046__,7.30,02,,01,04,02,02,01,02,02,01,,,,,,,,50,,26,40.841300,-75.049060,N,,8520,2020-11-23 07:56:00
195467,21,23,B150-2020-00338A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,01,2,US 46,E,46,,00000046__,8.10,02,,01,04,02,01,06,01,04,01,300,FE,E,W TITMAN RD,,,,50,25,26,40.812200,-75.049400,N,,8395,2020-11-20 18:20:00


In [90]:
list(sorted(has_sri_ll['Crash Location'].value_counts().index.tolist()))

['ATLANTIC CITY EXPRESSWAY',
 'ATLANTIC CITY-BRIGANTINE CONNECTOR',
 'GARDEN STATE PARKWAY',
 'I-195',
 'I-278',
 'I-280',
 'I-287',
 'I-295',
 'I-676',
 'I-76',
 'I-78',
 'I-80',
 'I-95  N.J. TURNPIKE',
 'I-95  N.J. TURNPIKE-WEST ALIGNMENT',
 'NEW JERSEY TURNPIKE',
 'NJ 10',
 'NJ 109',
 'NJ 12',
 'NJ 120',
 'NJ 122',
 'NJ 124',
 'NJ 129',
 'NJ 13',
 'NJ 133',
 'NJ 138',
 'NJ 139 LOWER',
 'NJ 139 UPPER',
 'NJ 140',
 'NJ 143',
 'NJ 147',
 'NJ 15',
 'NJ 152',
 'NJ 154',
 'NJ 156',
 'NJ 157',
 'NJ 159',
 'NJ 161',
 'NJ 162',
 'NJ 165',
 'NJ 166',
 'NJ 166 Z',
 'NJ 168',
 'NJ 17',
 'NJ 171',
 'NJ 172',
 'NJ 173',
 'NJ 175',
 'NJ 179',
 'NJ 18',
 'NJ 181',
 'NJ 182',
 'NJ 183',
 'NJ 184',
 'NJ 185',
 'NJ 187',
 'NJ 19',
 'NJ 20',
 'NJ 208',
 'NJ 21',
 'NJ 23',
 'NJ 24',
 'NJ 26',
 'NJ 27',
 'NJ 27 Z',
 'NJ 28',
 'NJ 284',
 'NJ 29',
 'NJ 3',
 'NJ 31',
 'NJ 32',
 'NJ 324',
 'NJ 33',
 'NJ 33 BUSINESS',
 'NJ 34',
 'NJ 347',
 'NJ 35',
 'NJ 35 Z',
 'NJ 36',
 'NJ 37',
 'NJ 38',
 'NJ 4',
 'NJ 41',


In [87]:
found_sris = has_sri_ll.SRI.unique()
len(found_sris), found_sris

(140,
 array(['00000030__', '00000009__', '00000157__', '00000087__',
        '00000040__', '00000187__', '00000446__', '00000446X_',
        '00000054__', '00000050__', '00000152__', '00000444__',
        '00000049__', '00000322__', '00000073__', '00000206__',
        '00000052__', '00000017__', '00000009W_', '00000445__',
        '00000080__', '00000120__', '00000095W_', '00000004__',
        '00000046__', '00000003__', '00000005__', '00000093__',
        '00000095__', '00000208__', '00000001__', '00000063__',
        '00000067__', '00000287__', '00000202__', '00000007__',
        '00000130__', '00000295__', '00000413__', '00000700__',
        '00000090__', '00000070__', '00000038__', '00000068__',
        '00000041__', '00000072__', '00000168__', '00000042__',
        '00000076__', '00000047__', '00000676__', '00000154__',
        '00000143__', '00000109__', '00000083__', '00000347__',
        '00000162__', '00000147__', '00000077__', '00000056__',
        '00000055__', '00000021__'

In [80]:
missing_lls = c20[sri_lls.isna().any(axis=1)]
missing_lls

Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date
0,01,01,2020-020639,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,True,False,04,2,ATLANTIC COUNTY 663,,663,,01000663__,1.34,05,,01,04,02,01,07,01,05,01,10,FE,E,ROUTE 585 / N MAIN ST / S SHORE RD,,,,25,35,26,,,N,,861,2020-08-03 02:31:00
3,01,01,2020025300,ATLANTIC,ABSECON CITY,01,EGG HARBOR TWP PD,EHTPD HQ,0,0,0,0,P,B,False,False,11,1,ATLANTIC COUNTY 685,N,685,,01000685__,0.23,05,,01,04,02,01,06,01,05,01,1200,FE,N,CR 651 / MILL RD,,,,45,45,52,39.435797,-74.536607,N,,5191,2020-04-29 23:30:00
5,01,01,I-2020-000255,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,True,False,11,1,ROUTE 585,,585,,00000585__,9.44,05,,01,04,02,01,06,01,05,01,150,FE,S,CR 630 / OHIO AVE,,,,35,35,52,,,N,UTILITY POLE #P19002,845,2020-01-03 02:44:00
8,01,01,I-2020-000392,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,11,1,W CHURCH ST,,,,01011263__,0.10,07,,01,04,02,02,07,02,05,01,200,FE,W,CR 634 / PITNEY RD,,,,25,25,52,,,N,45 W CHURCH-ROOF/ GUTTERS/ SIDING. OWNER NOTIF...,854,2020-01-04 20:01:00
10,01,01,I-2020-000829,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,06,2,ATLANTIC COUNTY 630,,630,,01000630__,0.14,05,,01,04,02,01,01,01,05,01,30,FE,E,MIAMI AVE,,,,35,25,28,,,N,,861,2020-01-09 11:56:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195468,21,23,B150-2020-00348A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,12,1,ROUTE 519,S,519,,00000519__,41.00,05,,01,04,02,01,05,01,05,01,,,,,,,,50,,24,40.833560,-75.027640,N,,8354,2020-12-07 17:43:00
195469,21,23,B150-2020-00350A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,04,3,ROUTE 519,S,519,,00000519__,42.80,05,,02,04,02,03,01,03,05,01,,,,,,,,50,,26,40.812200,-75.049400,N,,8246,2020-12-09 11:26:00
195470,21,23,B150-2020-00354A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,11,1,PARKING LOT OF 187 CR 519,,,,,,09,,01,04,02,02,06,04,05,01,,,,,,,,15,,56,40.805720,-75.076940,N,Mobile advertisement sign and concrete column.,8354,2020-12-12 18:45:00
195471,21,23,B150-2020-00361A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,1,0,1,0,F,I,False,False,13,1,ROUTE 519,S,519,,00000519__,38.12,05,,01,04,02,01,05,01,05,01,,AT,,WINDTRYST WAY,,,,50,25,22,40.863590,-75.014050,N,,6657,2020-12-15 06:12:00


In [84]:
all_sris = c20.SRI.unique()
len(all_sris), all_sris

(17474,
 array(['01000663__', '00000030__', '00000009__', ..., '21000626__',
        '21231014__', '21231034__'], dtype=object))

In [82]:
missing_sris = missing_lls.SRI.unique()
len(missing_sris), missing_sris

(17388,
 array(['01000663__', '01000685__', '00000585__', ..., '21000626__',
        '21231014__', '21231034__'], dtype=object))

In [83]:
grove_sri in missing_sris

True

### `c20lls`: SRI/MP10 LAT/LON (fall back to crash Latitude/Longitude)

In [22]:
%%time
c20lls = sxs(c20, sri_lls)
merged_lls = c20lls.apply(
    lambda r: dict(
#         Lat=r.LAT if isna(r.Latitude) else r.Latitude,
#         Lon=r.LON if isna(r.Longitude) else r.Longitude,
        Lat=r.Latitude if isna(r.LAT) else r.LAT,
        Lon=r.Longitude if isna(r.LON) else r.LON,
    ),
    axis=1,
).apply(Series)
c20lls = sxs(c20lls, merged_lls)
c20lls

CPU times: user 13.9 s, sys: 258 ms, total: 14.1 s
Wall time: 14.1 s


Unnamed: 0,County Code,Municipality Code,Department Case Number,County Name,Municipality Name,Police Dept Code,Police Department,Police Station,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured,Severity,Intersection,Alcohol Involved,HazMat Involved,Crash Type Code,Total Vehicles Involved,Crash Location,Location Direction,Route,Route Suffix,SRI,MP,Road System,Road Character,Road Horizontal Alignment,Road Grade,Road Surface Type,Surface Condition,Light Condition,Environmental Condition,Road Divided By,Temporary Traffic Control Zone,Distance To Cross Street,Unit Of Measurement,Directn From Cross Street,Cross Street Name,Is Ramp,Ramp To/From Route Name,Ramp To/From Route Direction,Posted Speed,Posted Speed Cross Street,First Harmful Event,Latitude,Longitude,Cell Phone In Use Flag,Other Property Damage,Reporting Badge No.,Date,LAT,LON,Lat,Lon
0,01,01,2020-020639,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,True,False,04,2,ATLANTIC COUNTY 663,,663,,01000663__,1.34,05,,01,04,02,01,07,01,05,01,10,FE,E,ROUTE 585 / N MAIN ST / S SHORE RD,,,,25,35,26,,,N,,861,2020-08-03 02:31:00,,,,
1,01,01,2020-021265,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,I,False,False,03,2,US 30,E,30,,00000030__,52.30,02,,02,06,02,01,01,01,01,01,,AT,,CR 631 / ILLINOIS AVE / TURNER AVE,,,,45,25,26,,,N,,861,2020-08-10 17:47:00,39.419951,-74.497231,39.419951,-74.497231
2,01,01,2020005031,ATLANTIC,ABSECON CITY,01,PLEASANTVILLE PD,,0,0,0,0,P,I,False,False,03,2,US 9,S,9,,00000009__,41.87,02,,01,04,02,01,01,01,05,01,,AT,,CR 663 / CALIFORNIA AVE / W CALIFOR,,,,30,25,26,39.413761,-74.512961,N,,1745,2020-02-29 16:54:00,39.414805,-74.512372,39.414805,-74.512372
3,01,01,2020025300,ATLANTIC,ABSECON CITY,01,EGG HARBOR TWP PD,EHTPD HQ,0,0,0,0,P,B,False,False,11,1,ATLANTIC COUNTY 685,N,685,,01000685__,0.23,05,,01,04,02,01,06,01,05,01,1200,FE,N,CR 651 / MILL RD,,,,45,45,52,39.435797,-74.536607,N,,5191,2020-04-29 23:30:00,,,39.435797,-74.536607
4,01,01,I-2020-000186,ATLANTIC,ABSECON CITY,01,ABSECON CITY PD,,0,0,0,0,P,B,False,False,11,1,US 30,,30,,00000030__,54.26,02,,01,04,02,01,01,01,01,01,882,FE,E,CR 646 / DELILAH RD,,,,50,25,19,,,N,,847,2020-01-02 08:35:00,39.393999,-74.491169,39.393999,-74.491169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195469,21,23,B150-2020-00350A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,04,3,ROUTE 519,S,519,,00000519__,42.80,05,,02,04,02,03,01,03,05,01,,,,,,,,50,,26,40.812200,-75.049400,N,,8246,2020-12-09 11:26:00,,,40.812200,-75.049400
195470,21,23,B150-2020-00354A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,11,1,PARKING LOT OF 187 CR 519,,,,,,09,,01,04,02,02,06,04,05,01,,,,,,,,15,,56,40.805720,-75.076940,N,Mobile advertisement sign and concrete column.,8354,2020-12-12 18:45:00,,,40.805720,-75.076940
195471,21,23,B150-2020-00361A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,1,0,1,0,F,I,False,False,13,1,ROUTE 519,S,519,,00000519__,38.12,05,,01,04,02,01,05,01,05,01,,AT,,WINDTRYST WAY,,,,50,25,22,40.863590,-75.014050,N,,6657,2020-12-15 06:12:00,,,40.863590,-75.014050
195472,21,23,B150-2020-00369A,WARREN,WHITE TWP,02,NEW JERSEY STATE POLICE,WASHINGTON - SQ,0,0,0,0,P,B,False,False,12,1,ROUTE 519,S,519,,00000519__,37.00,05,,01,04,02,01,05,01,05,01,,,,,,,,50,,24,40.746800,-75.137400,N,,8246,2020-12-19 21:37:00,,,40.746800,-75.137400


In [23]:
c20lls[c20lls.Lat.isna()].isna().sum()

County Code                           0
Municipality Code                     0
Department Case Number                0
County Name                           0
Municipality Name                     0
Police Dept Code                      0
Police Department                     0
Police Station                        0
Total Killed                          0
Total Injured                         0
Pedestrians Killed                    0
Pedestrians Injured                   0
Severity                              0
Intersection                          0
Alcohol Involved                      0
HazMat Involved                       0
Crash Type Code                       0
Total Vehicles Involved               0
Crash Location                        0
Location Direction                    0
Route                                 0
Route Suffix                          0
SRI                                   0
MP                                11555
Road System                           0


In [24]:
(~c20lls.Lat.isna()).sum(), (~c20lls.Lon.isna()).sum()

(98544, 98544)

In [25]:
pd.crosstab(~c20lls.Latitude.isna(), ~c20lls.LAT.isna())

LAT,False,True
Latitude,Unnamed: 1_level_1,Unnamed: 2_level_1
False,96930,36815
True,26149,35580


### Plot crashes

In [None]:
fig = px.scatter_mapbox(
    c20lls,
    lat="Lat", lon="Lon",
    color='Severity',
    #hover_name="City",
    hover_data=["County Name", "Date", "SRI", "MP"],
    color_discrete_sequence=["yellow", "orange", "red"],
    center=dict(lat=40.29, lon=-74.715),
    zoom=7.6,
    height=1000,
)
legend_bgcolor = '50'
fig.update_layout(
    **mapbox,
    title=dict(
        text="NJ Crashes, 2020<br><sup>(mostly NJ/US roads, county/city data incomplete)</sup>",
        x=0.5, y=0.95,
        xanchor='center', yanchor='top',
        font=dict(size=32, color="white")
    ),
    legend=dict(
        x=0.98, y=0.90,
        xanchor="right", yanchor="top",
        font=dict(
            size=14,
            color="white"
        ),
        bgcolor=f"rgba({legend_bgcolor},{legend_bgcolor},{legend_bgcolor},0.8)",
        bordercolor="white",
        borderwidth=2,
    ),
)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.write_image('nj-crashes-mapbox.png', width=800, height=1000)
fig.write_json('nj-crashes-mapbox.json')
fig.show()

In [28]:
c20lls[['Lat', 'Lon']].value_counts().value_counts().sort_index()

count
1      35247
2       6122
3       2577
4       1490
5        906
6        628
7        472
8        303
9        255
10       174
11       172
12       116
13       127
14        70
15        73
16        57
17        42
18        32
19        34
20        30
21        38
22        27
23        15
24        15
25        22
26        10
27        15
28        13
29        10
30        12
31         7
32        10
33         4
34         2
35         3
36         2
37         3
38         3
39         4
40         4
41         2
42         3
43         6
44         2
45         3
47         1
48         2
49         2
50         1
52         2
53         1
54         1
56         1
57         1
58         1
59         1
62         2
63         1
65         1
66         1
68         1
76         2
79         1
84         1
87         1
103        1
108        1
162        1
240        2
Name: count, dtype: int64

## Double-LL crashes only

In [29]:
has_lls = c20lls[['Latitude', 'Longitude', 'LAT', 'LON']].dropna()
has_lls

Unnamed: 0,Latitude,Longitude,LAT,LON
2,39.413761,-74.512961,39.414805,-74.512372
1225,39.232200,-74.285700,39.389749,-74.485318
1226,39.362230,-74.440650,39.362405,-74.440298
1227,39.965490,-75.651960,39.371281,-74.430454
1228,39.549720,-74.732950,39.375739,-74.478465
...,...,...,...,...
195461,40.853190,-75.057040,40.850427,-75.057877
195462,40.840300,-75.035930,40.840211,-75.037069
195465,40.841300,-75.049060,40.841637,-75.049830
195467,40.812200,-75.049400,40.840519,-75.035219


### Check distance between LAT/LON (from SRI/MP10) and crash Latitude/Longitude

In [30]:
ll1 = gpd.GeoDataFrame(geometry=has_lls[['Longitude', 'Latitude']].apply(Point, axis=1), crs=mp10s.crs).geometry.rename('ll')
ll2 = gpd.GeoDataFrame(geometry=has_lls[['LON', 'LAT']].apply(Point, axis=1), crs=mp10s.crs).geometry.rename('LL')
sxs(ll1, ll2)

Unnamed: 0,ll,LL
2,POINT (-74.513 39.414),POINT (-74.512 39.415)
1225,POINT (-74.286 39.232),POINT (-74.485 39.390)
1226,POINT (-74.441 39.362),POINT (-74.440 39.362)
1227,POINT (-75.652 39.965),POINT (-74.430 39.371)
1228,POINT (-74.733 39.550),POINT (-74.478 39.376)
...,...,...
195461,POINT (-75.057 40.853),POINT (-75.058 40.850)
195462,POINT (-75.036 40.840),POINT (-75.037 40.840)
195465,POINT (-75.049 40.841),POINT (-75.050 40.842)
195467,POINT (-75.049 40.812),POINT (-75.035 40.841)


In [31]:
def dist(r):
    p1, p2 = r.ll, r.LL
    [(p1lon, p1lat)] = list(p1.coords)
    [(p2lon, p2lat)] = list(p2.coords)
    m = distance.geodesic([p1lat, p1lon], [p2lat, p2lon]).m
    return m
    #[p1lat, p1lon], [p2lat, p2lon], 

distances = sxs(ll1, ll2).apply(dist, axis=1)
distances

2            126.494776
1225       24542.661131
1226          36.028104
1227      123845.175268
1228       29201.990896
              ...      
195461       314.825136
195462        96.593374
195465        74.937886
195467      3364.687476
195473         1.939370
Length: 35580, dtype: float64

In [32]:
distances.sort_values()

146596         0.068508
145745         0.068508
50677          0.142478
49942          0.142478
49956          0.142478
              ...      
54943     289853.175823
55032     293729.898469
60036     296267.618928
157275    303798.202658
14854     305749.524213
Length: 35580, dtype: float64

### Distance between LL types

In [33]:
px.line(distances.sort_values().reset_index(drop=True).rename('Distance (m)'), log_y=True)

In [34]:
worst = distances.sort_values().tail().index
worst = has_lls.loc[worst]
worst

Unnamed: 0,Latitude,Longitude,LAT,LON
54943,38.23185,-74.94964,40.777388,-74.201208
55032,38.12964,-74.25651,40.774958,-74.203274
60036,38.0561,-74.5681,40.711418,-74.226265
157275,38.26146,-75.26465,40.863809,-74.171926
14854,38.25646,-74.25654,41.006495,-74.069929


### Inspect LLs inside/outside of NJ boundary

In [35]:
nj_lls = has_lls[['LAT','LON']].apply(is_nj, axis=1)

In [36]:
len(nj_lls), nj_lls.sum()

(35580, 35573)

In [37]:
has_lls[~nj_lls]

Unnamed: 0,Latitude,Longitude,LAT,LON
16560,41.01723,-74.29673,41.112863,-74.162487
16979,41.06221,-74.06219,41.066115,-74.056762
16987,41.06491,-74.05796,41.066115,-74.056762
17002,41.06607,-74.05669,41.066115,-74.056762
17011,41.06607,-74.05669,41.066115,-74.056762
36761,39.92214,-75.11618,39.953016,-75.134612
85161,40.71066,-74.19688,40.727438,-74.021306


In [38]:
county_lls = has_lls[['LAT','LON']].apply(get_county, axis=1)
county_lls

2         Atlantic County
1225      Atlantic County
1226      Atlantic County
1227      Atlantic County
1228      Atlantic County
               ...       
195461      Warren County
195462      Warren County
195465      Warren County
195467      Warren County
195473      Warren County
Length: 35580, dtype: object

In [39]:
county_lls.value_counts().sort_index()

Atlantic County      1895
Bergen County        1878
Burlington County    2301
Camden County        2243
Cape May County       393
Cumberland County     743
Essex County         2907
Gloucester County    2043
Hudson County         837
Hunterdon County      739
Mercer County         943
Middlesex County     3586
Monmouth County      2348
Morris County        2505
Ocean County         2467
Passaic County       2173
Salem County          412
Somerset County      1317
Sussex County         468
Union County         2755
Warren County         620
Name: count, dtype: int64

In [40]:
has_lls[county_lls.isna()]

Unnamed: 0,Latitude,Longitude,LAT,LON
16560,41.01723,-74.29673,41.112863,-74.162487
16979,41.06221,-74.06219,41.066115,-74.056762
16987,41.06491,-74.05796,41.066115,-74.056762
17002,41.06607,-74.05669,41.066115,-74.056762
17011,41.06607,-74.05669,41.066115,-74.056762
36761,39.92214,-75.11618,39.953016,-75.134612
85161,40.71066,-74.19688,40.727438,-74.021306


In [41]:
%%time
county_lls2 = has_lls[['Latitude','Longitude']].rename(columns={'Latitude':'LAT', 'Longitude':'LON'}).apply(get_county, axis=1)
county_lls2

CPU times: user 28 s, sys: 41.5 ms, total: 28 s
Wall time: 28 s


2         Atlantic County
1225                 None
1226      Atlantic County
1227                 None
1228      Atlantic County
               ...       
195461      Warren County
195462      Warren County
195465      Warren County
195467      Warren County
195473      Warren County
Length: 35580, dtype: object

In [42]:
county_lls2.value_counts().sort_index()

Atlantic County      1883
Bergen County        1861
Burlington County    2155
Camden County        2254
Cape May County       412
Cumberland County     750
Essex County         2692
Gloucester County    1956
Hudson County         714
Hunterdon County      757
Mercer County         911
Middlesex County     3536
Monmouth County      2647
Morris County        2574
Ocean County         2503
Passaic County       2122
Salem County          386
Somerset County      1397
Sussex County         447
Union County         2596
Warren County         619
Name: count, dtype: int64

In [43]:
has_lls[county_lls2.isna()]

Unnamed: 0,Latitude,Longitude,LAT,LON
1225,39.23220,-74.28570,39.389749,-74.485318
1227,39.96549,-75.65196,39.371281,-74.430454
1255,39.65214,-75.65984,39.362405,-74.440298
1297,39.37397,-74.22548,39.362405,-74.440298
2673,39.95119,-75.99519,39.414789,-74.552270
...,...,...,...,...
193932,40.89890,-75.89850,40.927104,-75.086873
193934,40.25450,-75.25410,40.932295,-75.100132
193945,40.89800,-75.89800,40.922454,-75.057356
193956,40.89800,-75.89800,40.927749,-75.088819
