# Compute crash stats

Crash records come with some denormalized summary stats:
- "Total killed" (`tk`)
- "Total injured" (`ti`)
- "Pedestrians killed" (`pk`)
- "Pedestrians injured" (`pi`')
- "Total vehicles involved" (`tv`)
- "Crash severity" (`severity`: `f` iff `tk > 0`, else `i` if `ti > 0`, else `p`)

These don't always match the normalized stats we can obtain by joining `crashes` against the `pedestrians`, `occupants`, and `vehicles` tables.

Here we overwrite `crashes.{parquet,db}` to include the normalized stats inferred from the other tables. We also perform a few checks to identify sources of discrepancies. In most cases, it seems like the "totals" listed on the crash record simply misplace pedestrians for occupants, or vice versa.

In [1]:
from utz import *
from njdot import crashes, vehicles, occupants, pedestrians
from nj_crashes.utils import sql

In [2]:
%%time
c = crashes.load()
c

Reading /Users/ryan/c/neighbor-ryan/nj-crashes/njdot/data/crashes.parquet


CPU times: user 4.96 s, sys: 1.18 s, total: 6.14 s
Wall time: 4.32 s


Unnamed: 0_level_0,dt,year,cc,mc,case,pdc,pdn,station,tk,ti,pk,pi,severity,Intersection,alcohol,hazmat,crash_type,tv,road,road_direction,route,Route Suffix,sri,mp,road_system,road_character,road_surface,surface_condition,light_condition,env_condition,road_divided,ttcz,cross_street_distance,Unit Of Measurement,Direction From Cross Street,cross_street,Is Ramp,ramp_route,Ramp To/From Route Direction,speed_limit,speed_limit_cross,olat,olon,cell_phone,Other Property Damage,Reporting Badge No.,horizontal_alignment,road_grade,first_harmful_event,occ,reason,ilon,ilat,icc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
0,2001-12-21 18:34:00,2001,1,1,#2001-17846,01,Absecon City,MUNICIPAL COMP?,0,0,0,0,p,B,False,False,3,2,CALDERON AVENUE,,,,,,7,2,2,1,6,1,5,1,100,FE,N,RT 30,,,,25,,,,False,NEVER SAW V-1 MINOR DAMAGE - NO INJURIES REPOR...,830,,,,,No MP,,,
1,2001-01-01 09:30:00,2001,1,1,01-00029,1,Absecon,,0,0,0,0,p,B,False,False,6,2,RITZ DRIVE,,,,,,7,1,2,3,6,1,5,1,,,,,,,,25,,,,False,,836,,,,,No MP,,,
2,2001-04-10 14:44:00,2001,1,1,01-004615,1,Absecon,,0,4,0,0,i,I,False,False,3,2,MORTON AVENUE,,,,,,7,1,2,1,1,1,5,1,,AT,,NEW YORK AVENUE,,,,25,,,,False,,836,,,,,No MP,,,
3,2001-04-15 13:56:00,2001,1,1,01-004880,1,Absecon,,0,1,0,0,i,B,False,False,1,2,RT 30,,30,,00000030__,51.099998,2,1,2,1,1,1,5,1,,,,,,,,45,,,,False,WITNESS-PETRIA GIBSON AND ANGELO HERSH,886,,,,,,-74.512308,39.432705,1
4,2001-04-16 10:29:00,2001,1,1,01-004912,1,Absecon,,0,0,0,0,p,I,False,False,1,2,CR 651,,651,,,,5,2,2,2,1,2,5,1,,AT,,CR 630,,,,35,,,,False,*BETH VEHICLES-RELIANCE INSURANCE CO.(215)864-...,836,,,,,No MP,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6077298,2021-05-26 21:08:00,2021,21,23,B150-2021-00117A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,I,False,False,11,1,WARREN COUNTY 620,N,620,,21000620__,0.000000,5,,2,1,6,1,5,1,,AT,,ROUTE 519 / S BRIDGEVILLE RD,,,,50,50,40.80707,-75.07559,False,Damage to 10ft of Country road guardrail.,7837,2,4,46,21,,-75.075607,40.807156,21
6077299,2021-05-25 07:26:00,2021,21,23,B150-2021-00118A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,I,False,False,1,2,MANUNKA CHUNK RD / UPPER SEREPTA RD **,W,,,21231034__,1.730000,7,,2,1,1,1,5,1,,AT,,MASSENATTS RD,,,,40,50,40.84466,-75.05549,False,,8610,1,4,26,21,,-75.056122,40.844345,21
6077300,2021-06-03 20:17:00,2021,21,23,B150-2021-00128A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,B,False,False,12,1,ROUTE 519,S,519,,00000519__,44.000000,5,,2,2,5,4,5,1,,,,,,,,50,,40.80564,-75.07627,False,,8514,3,4,24,21,,-75.015149,40.863108,21
6077301,2021-06-06 00:00:00,2021,21,23,B150-2021-00130A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,B,False,False,12,1,US 46,E,46,,00000046__,7.500000,2,,2,1,7,1,5,1,,,,,,,,50,,40.84149,-75.04968,False,,8365,1,4,24,21,,-75.046404,40.840421,21


In [3]:
p = pedestrians.load()
p

Reading /Users/ryan/c/neighbor-ryan/nj-crashes/njdot/data/pedestrians.parquet


Unnamed: 0_level_0,crash_id,pn,condition,city,state,zip,dob,age,sex,alc_test_given,alc_test_type,alc_test_results,charge1,summons1,traffic_controls,cir1,cir2,dir,act,inj_loc,inj_type,med_refused,safety_used,hospital,status1,cyclist,other,charge2,summons2,charge3,summons3,charge4,summons4,status2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
0,7,1,3,ABSECON,NJ,08201,07/28/1990,10,M,,,,,,,,,,41,1,5,,,,1,False,False,,,,,,,
1,48,31,3,ABSECON,NJ,08201,01/16/1967,34,M,,,,,,,,,,,1,4,,,,,True,False,,,,,,,
2,76,1,2,EGG HARBOR TWP.,NJ,08234,11/16/1952,48,F,N,,,,,,,,,46,12,3,,,,1,False,False,,,,,,,
3,114,1,4,GALLOWAY TWP.,NJ,08205,03/31/1959,42,F,N,,,SUBPOENA,,,,,,49,7,,,,,1,False,False,,,,,,,
4,236,1,4,ABSECON,NJ,08201,04/18/1953,48,M,N,,,,,,,,,43,1,4,,,,1,False,False,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178516,6076961,1,4,WASHINGTON,NJ,07882,,74,F,N,,,,,5,25,,3,43,6,8,2,1,7102,1,False,False,,,,,,,
178517,6076969,31,3,WASHINGTON,NJ,07882,,17,M,N,,,,,4,25,,2,3,11,5,1,8,,1,True,False,,,,,,,
178518,6076973,1,1,PORT MURRAY,NJ,07865,,59,M,N,,,,,4,73,89,0,36,12,3,,,7102,2,False,False,,,,,,,
178519,6077021,1,2,WASHINGTON,NJ,07882,,65,M,N,,,,,11,89,,3,42,12,3,2,,99,1,False,False,,,,,,,


In [4]:
o = occupants.load()
o

Reading /Users/ryan/c/neighbor-ryan/nj-crashes/njdot/data/occupants.parquet


Unnamed: 0_level_0,crash_id,vehicle_id,on,condition,pos,eject,age,sex,inj_loc,inj_type,med_refused,safety_avail,safety_used,airbag,hospital
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,0,0,1,,1,1,38,M,,,,4,4,,
1,0,1,2,,1,1,63,F,,,,4,4,,
2,1,2,1,,,,,,,,,,,,
3,2,4,1,3,1,1,29,F,6,8,,4,4,,
4,2,4,2,3,3,1,7,M,8,5,,4,4,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13756721,6077301,11450823,1,5,1,1,74,M,,,01,11,4,,
13756722,6077302,11450824,1,5,1,1,23,M,,,,11,4,,
13756723,6077302,11450824,2,5,3,1,23,F,,,,11,4,,
13756724,6077302,11450824,3,5,6,1,25,M,,,,11,4,,


In [5]:
v = vehicles.load()
v

Reading /Users/ryan/c/neighbor-ryan/nj-crashes/njdot/data/vehicles.parquet


Unnamed: 0_level_0,crash_id,vn,ins_co,owner_state,make,model,color,vy,state,rm_by,impact_loc,damage_loc,type,use,cargo_type,cir1,cir2,dir,act,ev1,ev2,ev3,ev4,oversize,hit_run,departure,damage,ev
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
0,0,1,426,NJ,NISSAN MAXIMA,,BUR,1991,NJ,,8,7,1,,,25,,1,3,26,,,,,False,1,,
1,0,2,989,NJ,LINCOLN TOWNCAR,,BK,1996,NJ,2,12,12,6,,0,4,,2,3,26,,,,0,False,1,,
2,1,1,962,NJ,TOYOTA 4DR,,GRN,1997,NJ,1,11,,1,,,25,,3,10,28,,,,,False,1,,
3,1,2,,,,,,0,,,0,0,5,,0,2,,1,1,26,,,,0,False,0,,
4,2,1,85,NJ,CHEVY CORSICA,,PUR,1996,NJ,3,8,15,1,,,25,,3,1,26,1,,,,False,6,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11450821,6077299,2,962,NJ,HYUNDAI,SONATA,BK,2012,NJ,2,6,6,1,01,,25,,4,3,26,,,,,False,1,3,26
11450822,6077300,1,134,NJ,SUBARU,FORRESTER,SL,2015,NJ,1,11,11,4,01,,57,,3,1,24,,,,2,False,1,3,24
11450823,6077301,1,148,NJ,TOYOTA,COROLLA,BK,2009,NJ,1,11,11,1,01,,57,,2,1,24,,,,,False,1,3,24
11450824,6077302,1,134,NJ,HYUNDAI,SONATA,BL,2013,NJ,1,1,1,1,01,,4,,4,1,26,,,,,False,1,2,26


In [6]:
%%time
cok = (o[o.condition == 1].crash_id.value_counts()).rename('cok')
coi = (o[(o.condition >= 2) & (o.condition <= 4)].crash_id.value_counts()).rename('coi')
coki = sxs(cok, coi).fillna(0).astype(int)
coki

CPU times: user 430 ms, sys: 216 ms, total: 646 ms
Wall time: 730 ms


Unnamed: 0_level_0,cok,coi
crash_id,Unnamed: 1_level_1,Unnamed: 2_level_1
940642,6,0
5677035,5,0
2553102,5,3
1287068,5,0
2311152,4,1
...,...,...
2268672,0,1
2268675,0,1
2268680,0,1
2268689,0,1


In [7]:
%%time
cpk = (p[p.condition == 1].crash_id.value_counts()).rename('cpk')
cpi = (p[(p.condition >= 2) & (p.condition <= 4)].crash_id.value_counts()).rename('cpi')
cpki = sxs(cpk, cpi).fillna(0).astype(int)
cpki

CPU times: user 44.7 ms, sys: 12.3 ms, total: 56.9 ms
Wall time: 64.9 ms


Unnamed: 0_level_0,cpk,cpi
crash_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3994086,3,0
383116,3,0
355837,3,0
3623003,2,0
4654690,2,1
...,...,...
1791269,0,1
1791249,0,1
1791209,0,1
1791179,0,1


In [8]:
%%time
ctv = (v.crash_id.value_counts()).rename('ctv')
ctv

CPU times: user 215 ms, sys: 79.1 ms, total: 294 ms
Wall time: 326 ms


crash_id
5201624    37
843638     26
1426238    25
4641884    20
670341     20
           ..
2177675     1
994117      1
2177672     1
3563767     1
2819231     1
Name: ctv, Length: 6076552, dtype: int64

In [9]:
ok = (c.tk - c.pk).rename('ok')
oi = (c.ti - c.pi).rename('oi')

In [10]:
cc = sxs(c.pk, cpk, c.pi, cpi, ok, cok, oi, coi, c.tv, ctv).fillna(0).astype(int)
cc

Unnamed: 0,pk,cpk,pi,cpi,ok,cok,oi,coi,tv,ctv
0,0,0,0,0,0,0,0,0,2,2
1,0,0,0,0,0,0,0,0,2,2
2,0,0,0,0,0,0,4,4,2,2
3,0,0,0,0,0,0,1,1,2,2
4,0,0,0,0,0,0,0,0,2,2
...,...,...,...,...,...,...,...,...,...,...
6077298,0,0,0,0,0,0,0,0,1,1
6077299,0,0,0,0,0,0,0,0,2,2
6077300,0,0,0,0,0,0,0,0,1,1
6077301,0,0,0,0,0,0,0,0,1,1


In [11]:
def kct(k):
    return pd.crosstab(cc[k], cc[f'c{k}']).replace(0, '')

In [12]:
kct('pk')

cpk,0,1,2,3
pk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,6073663.0,,,
1,,3597.0,,
2,,,40.0,
3,,,,3.0


In [13]:
kct('ok')

cok,0,1,2,3,4,5,6
ok,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
-2,1.0,,,,,,
-1,14.0,,,,,,
0,6068232.0,16.0,,,,,
1,38.0,8347.0,1.0,,,,
2,,,544.0,,,,
3,,,,90.0,,,
4,,,,,16.0,,
5,,,,,,3.0,
6,,,,,,,1.0


In [14]:
kct('pi')

cpi,0,1,2,3,4,5,6,7,8,9,10,16
pi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,5944994.0,,,,,,,,,,,
1,,129557.0,,,,,,,,,,
2,,,2450.0,,,,,,,,,
3,,,,241.0,,,,,,,,
4,,,,,37.0,,,,,,,
5,,,,,,17.0,,,,,,
6,,,,,,,2.0,,,,,
7,,,,,,,,1.0,,,,
8,,,,,,,,,1.0,,,
9,,,,,,,,,,1.0,,


In [15]:
kct('oi')

coi,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,35,36,37,40,42,44,56
oi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
-3,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-1,16.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,4817951.0,75.0,7.0,3.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,221.0,888199.0,56.0,10.0,4.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,24.0,29.0,259170.0,24.0,6.0,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,8.0,2.0,10.0,73496.0,7.0,1.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,3.0,2.0,1.0,7.0,24515.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,2.0,1.0,1.0,2.0,1.0,9271.0,2.0,2.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,2.0,1.0,,,,1.0,2235.0,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,,,,,,,,951.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [16]:
kct('tv')

ctv,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,25,26,37
tv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0,751.0,,,,,,,,,,,,,,,,,,,,,,,
1,,1171481.0,,,,,,,,,,,,,,,,,,,,,,
2,,,4520681.0,,,,,,,,,,,,,,,,,,,,,
3,,,,318517.0,,,,,,,,,,,,,,,,,,,,
4,,,,,52939.0,,,,,,,,,,,,,,,,,,,
5,,,,,,9378.0,,,,,,,,,,,,,,,,,,
6,,,,,,,2361.0,,,,,,,,,,,,,,,,,
7,,,,,,,,681.0,,,,,,,,,,,,,,,,
8,,,,,,,,,251.0,,,,,,,,,,,,,,,
9,,,,,,,,,,115.0,,,,,,,,,,,,,,


In [17]:
pd.crosstab(c.severity == 'f', c.tk > 0)

tk,False,True
severity,Unnamed: 1_level_1,Unnamed: 2_level_1
False,6064650,0
True,0,12653


In [18]:
cols = [ 
    'pk', 'pi',
#    'ok', 'oi',
    'tv' ,
]

In [19]:
c2 = c.copy()
for k in cols:
    col = f'c{k}'
    c2[k] = cc[col].fillna(0).astype(int)
c2['tk'] = cc.pk + cc.ok
c2['ti'] = cc.pi + cc.oi
c2['severity'] = 'p'
c2.loc[c2.ti > 0, 'severity'] = 'i'
c2.loc[c2.tk > 0, 'severity'] = 'f'
c2

Unnamed: 0_level_0,dt,year,cc,mc,case,pdc,pdn,station,tk,ti,pk,pi,severity,Intersection,alcohol,hazmat,crash_type,tv,road,road_direction,route,Route Suffix,sri,mp,road_system,road_character,road_surface,surface_condition,light_condition,env_condition,road_divided,ttcz,cross_street_distance,Unit Of Measurement,Direction From Cross Street,cross_street,Is Ramp,ramp_route,Ramp To/From Route Direction,speed_limit,speed_limit_cross,olat,olon,cell_phone,Other Property Damage,Reporting Badge No.,horizontal_alignment,road_grade,first_harmful_event,occ,reason,ilon,ilat,icc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
0,2001-12-21 18:34:00,2001,1,1,#2001-17846,01,Absecon City,MUNICIPAL COMP?,0,0,0,0,p,B,False,False,3,2,CALDERON AVENUE,,,,,,7,2,2,1,6,1,5,1,100,FE,N,RT 30,,,,25,,,,False,NEVER SAW V-1 MINOR DAMAGE - NO INJURIES REPOR...,830,,,,,No MP,,,
1,2001-01-01 09:30:00,2001,1,1,01-00029,1,Absecon,,0,0,0,0,p,B,False,False,6,2,RITZ DRIVE,,,,,,7,1,2,3,6,1,5,1,,,,,,,,25,,,,False,,836,,,,,No MP,,,
2,2001-04-10 14:44:00,2001,1,1,01-004615,1,Absecon,,0,4,0,0,i,I,False,False,3,2,MORTON AVENUE,,,,,,7,1,2,1,1,1,5,1,,AT,,NEW YORK AVENUE,,,,25,,,,False,,836,,,,,No MP,,,
3,2001-04-15 13:56:00,2001,1,1,01-004880,1,Absecon,,0,1,0,0,i,B,False,False,1,2,RT 30,,30,,00000030__,51.099998,2,1,2,1,1,1,5,1,,,,,,,,45,,,,False,WITNESS-PETRIA GIBSON AND ANGELO HERSH,886,,,,,,-74.512308,39.432705,1
4,2001-04-16 10:29:00,2001,1,1,01-004912,1,Absecon,,0,0,0,0,p,I,False,False,1,2,CR 651,,651,,,,5,2,2,2,1,2,5,1,,AT,,CR 630,,,,35,,,,False,*BETH VEHICLES-RELIANCE INSURANCE CO.(215)864-...,836,,,,,No MP,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6077298,2021-05-26 21:08:00,2021,21,23,B150-2021-00117A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,I,False,False,11,1,WARREN COUNTY 620,N,620,,21000620__,0.000000,5,,2,1,6,1,5,1,,AT,,ROUTE 519 / S BRIDGEVILLE RD,,,,50,50,40.80707,-75.07559,False,Damage to 10ft of Country road guardrail.,7837,2,4,46,21,,-75.075607,40.807156,21
6077299,2021-05-25 07:26:00,2021,21,23,B150-2021-00118A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,I,False,False,1,2,MANUNKA CHUNK RD / UPPER SEREPTA RD **,W,,,21231034__,1.730000,7,,2,1,1,1,5,1,,AT,,MASSENATTS RD,,,,40,50,40.84466,-75.05549,False,,8610,1,4,26,21,,-75.056122,40.844345,21
6077300,2021-06-03 20:17:00,2021,21,23,B150-2021-00128A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,B,False,False,12,1,ROUTE 519,S,519,,00000519__,44.000000,5,,2,2,5,4,5,1,,,,,,,,50,,40.80564,-75.07627,False,,8514,3,4,24,21,,-75.015149,40.863108,21
6077301,2021-06-06 00:00:00,2021,21,23,B150-2021-00130A,02,New Jersey State Police,WASHINGTON - SQ,0,0,0,0,p,B,False,False,12,1,US 46,E,46,,00000046__,7.500000,2,,2,1,7,1,5,1,,,,,,,,50,,40.84149,-75.04968,False,,8365,1,4,24,21,,-75.046404,40.840421,21


In [20]:
((c == c2) | (c.isna() & c2.isna())).all()

dt                              0
year                            0
cc                              0
mc                              0
case                            0
pdc                             0
pdn                             0
station                         0
tk                              0
ti                              0
pk                              0
pi                              0
severity                        0
Intersection                    0
alcohol                         0
hazmat                          0
crash_type                      0
tv                              0
road                            0
road_direction                  0
route                           0
Route Suffix                    0
sri                             0
mp                              0
road_system                     0
road_character                  0
road_surface                    0
surface_condition               0
light_condition                 0
env_condition 

In [21]:
from njdot import CRASHES_PQT, CRASHES_DB

In [22]:
c2.to_parquet(CRASHES_PQT)

In [23]:
from njdot.load import CRASH_IDXS

In [24]:
%%time
sql.write(
    c2, 'crashes', CRASHES_DB,
    idxs=crash_idxs,
    rm=True,
    replace=False,
    page_size=2**16,
)

Removing /Users/ryan/c/neighbor-ryan/nj-crashes/www/public/njdot/crashes.db
Writing 6077303 rows to /Users/ryan/c/neighbor-ryan/nj-crashes/www/public/njdot/crashes.db
Wrote DB: 1315594240 bytes
After indices: 1932619776 bytes


CPU times: user 2min 55s, sys: 1min 34s, total: 4min 29s
Wall time: 7min 43s


After setting page_size=65536 and vacuum: 1883701248 bytes


In [25]:
import boto3
s3 = boto3.client('s3')

In [26]:
s3.upload_file(CRASHES_DB, Bucket='nj-crashes', Key=f'njdot/data/{basename(CRASHES_DB)}')