In [1]:
from utz import *
from njdot.data import Data, START_YEAR, END_YEAR, YEARS, YPK, PK, hist
from njdot.codes import *

In [2]:
data = Data()
data

Data(years=['2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020'], types=['Crash', 'Driver', 'Occupant', 'Pedestrian', 'Vehicle'], columns=None)

### Load Crashes
Used for joining against Pedestrian/Occupant tables, to provide them with dates

In [3]:
%%time
cs = data['Crash'].df(['Date', 'Total Killed', 'Total Injured', 'Pedestrians Killed', 'Pedestrians Injured'])
cs

CPU times: user 11.8 s, sys: 5.53 s, total: 17.4 s
Wall time: 17 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Date,Total Killed,Total Injured,Pedestrians Killed,Pedestrians Injured
Year,County Code,Municipality Code,Department Case Number,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2001,01,01,#2001-17846,2001-12-21 18:34:00,0,0,0,0
2001,01,01,01-00029,2001-01-01 09:30:00,0,0,0,0
2001,01,01,01-004615,2001-04-10 14:44:00,0,4,0,0
2001,01,01,01-004880,2001-04-15 13:56:00,0,1,0,0
2001,01,01,01-004912,2001-04-16 10:29:00,0,0,0,0
...,...,...,...,...,...,...,...,...
2020,21,23,B150-2020-00350A,2020-12-09 11:26:00,0,0,0,0
2020,21,23,B150-2020-00354A,2020-12-12 18:45:00,0,0,0,0
2020,21,23,B150-2020-00361A,2020-12-15 06:12:00,1,0,1,0
2020,21,23,B150-2020-00369A,2020-12-19 21:37:00,0,0,0,0


In [4]:
acd = cs.Date
acd

Year  County Code  Municipality Code  Department Case Number
2001  01           01                 #2001-17846              2001-12-21 18:34:00
                                      01-00029                 2001-01-01 09:30:00
                                      01-004615                2001-04-10 14:44:00
                                      01-004880                2001-04-15 13:56:00
                                      01-004912                2001-04-16 10:29:00
                                                                       ...        
2020  21           23                 B150-2020-00350A         2020-12-09 11:26:00
                                      B150-2020-00354A         2020-12-12 18:45:00
                                      B150-2020-00361A         2020-12-15 06:12:00
                                      B150-2020-00369A         2020-12-19 21:37:00
                                      B150-2020-00383A         2020-12-31 12:47:00
Name: Date, Length: 585034

In [5]:
def load_dated_type(typ, cols):
    typ_data = data[typ]
    dfds = []
    for year in YEARS:
        df = typ_data[year].df(cols)
        ycd = acd.loc[year]
        dfd = df.merge(ycd, left_index=True, right_index=True, how='left')
        assert (dfd.isna().sum() == 0).all()
        dfds.append(dfd)
    return pd.concat(dfds)

## Load Pedestrian / Cyclist Data

In [6]:
%%time
apcd = load_dated_type('Pedestrian', ['Is Bicyclist?', 'Physical Condition'])
apcd['Physical Condition'] = apcd['Physical Condition'].apply(lambda v: physical_condition[v])
apcd

CPU times: user 5.92 s, sys: 369 ms, total: 6.29 s
Wall time: 6.42 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Is Bicyclist?,Physical Condition,Date
County Code,Municipality Code,Department Case Number,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
01,01,01-007162,2001,False,SUSPECTED MINOR INJURY,2001-06-05 14:41:00
01,01,01-11832,2001,True,SUSPECTED MINOR INJURY,2001-09-11 08:54:00
01,01,01-13335,2001,False,SUSPECTED SERIOUS INJURY,2001-10-09 20:52:00
01,01,01-16145,2001,False,POSSIBLE INJURY,2001-11-29 11:58:00
01,01,01-7828,2001,False,POSSIBLE INJURY,2001-06-20 07:17:00
...,...,...,...,...,...,...
21,21,2020-014845,2020,False,FATAL INJURY,2020-11-04 18:07:00
21,21,2020-016481,2020,False,SUSPECTED MINOR INJURY,2020-12-08 15:12:00
21,22,2020-003616,2020,True,NO APPARENT INJURY,2020-03-10 07:25:00
21,22,2020-005705,2020,True,POSSIBLE INJURY,2020-05-05 15:24:00


In [7]:
pmh = apcd.reset_index(drop=True)
pmh['Month'] = pmh.Date.dt.to_period('M').astype('datetime64[M]')
pmh = pmh[['Month', 'Is Bicyclist?', 'Physical Condition']].value_counts().sort_index().rename('Count')
pmh = (
    pmh
    .reset_index()
    .pivot(
        index=['Is Bicyclist?', 'Month'],
        columns=['Physical Condition'],
        values='Count',
    )
    .fillna(0)
    .astype(int)
    .rename(columns=physical_condition2)
)
bki2 = pmh.loc[(True,)]
bk2 = bki2['Fatality'].rename('bk2')
bi2 = bki2[['Serious Injury', 'Minor Injury', 'Possible Injury' ]].sum(1).rename('bi2')
pki2 = pmh.loc[(False,)]
pk2 = pki2['Fatality'].rename('pk2')
pi2 = pki2[['Serious Injury', 'Minor Injury', 'Possible Injury' ]].sum(1).rename('pi2')
pki2

Physical Condition,Unnamed: 1_level_0,Fatality,No Apparent Injury,Possible Injury,Minor Injury,Serious Injury,Unknown
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-01-01,83,8,0,336,150,34,1
2001-02-01,75,11,0,230,118,19,0
2001-03-01,85,9,0,297,130,40,0
2001-04-01,93,6,0,281,171,38,0
2001-05-01,82,11,0,275,176,42,0
...,...,...,...,...,...,...,...
2020-08-01,52,9,12,84,98,37,0
2020-09-01,44,24,16,97,98,40,0
2020-10-01,65,15,17,102,114,49,0
2020-11-01,45,16,14,96,118,25,0


## Load Occupant (Drivers / Passengers) Data

In [8]:
%%time
aocd = load_dated_type('Occupant', ['Position In/On Vehicle', 'Ejection Code', 'Physical Condition'])
aocd['Position In/On Vehicle'] = aocd['Position In/On Vehicle'].replace('0?', '00')
#aocd['Physical Condition'] = aocd['Physical Condition'].apply(lambda v: physical_condition[v])
aocd

CPU times: user 48.2 s, sys: 8.21 s, total: 56.4 s
Wall time: 57.5 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Position In/On Vehicle,Ejection Code,Physical Condition,Date
County Code,Municipality Code,Department Case Number,Year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01,01,#2001-17846,2001,01,01,,2001-12-21 18:34:00
01,01,#2001-17846,2001,01,01,,2001-12-21 18:34:00
01,01,01-00029,2001,,,,2001-01-01 09:30:00
01,01,01-004615,2001,01,01,03,2001-04-10 14:44:00
01,01,01-004615,2001,03,01,03,2001-04-10 14:44:00
...,...,...,...,...,...,...,...
21,23,B150-2020-00361A,2020,01,01,05,2020-12-15 06:12:00
21,23,B150-2020-00369A,2020,01,01,05,2020-12-19 21:37:00
21,23,B150-2020-00369A,2020,03,01,05,2020-12-19 21:37:00
21,23,B150-2020-00383A,2020,01,01,05,2020-12-31 12:47:00


In [9]:
hist(aocd['Position In/On Vehicle'], vehicle_position)

                              323463
Unknown                        24022
Driver                       9480728
Passenger (front middle)      115542
Passenger (front right)      1853078
Passenger (row 2 left)        494553
Passenger (row 2 middle)      162452
Passenger (row 2 right)       597021
Passenger (row 3 left)         53737
Passenger (row 3 middle)       47406
Passenger (row 3 right)        57208
Passenger (cargo area)         18747
Riding/Hanging on outside       9294
Bus Passenger                  20333
Name: Position In/On Vehicle, dtype: int64

In [10]:
omh = aocd.reset_index(drop=True)
omh['Month'] = omh.Date.dt.to_period('M').astype('datetime64[M]')
omh = omh[['Month', 'Position In/On Vehicle', 'Physical Condition']].value_counts().sort_index().rename('Count')
omh = (
    omh
    .reset_index()
    .pivot(
        index=['Position In/On Vehicle', 'Month'],
        columns=['Physical Condition'],
        values='Count',
    )
    .fillna(0)
    .astype(int)
    .rename(columns=physical_condition2)
)
omh = omh.rename(columns=physical_condition).rename(columns=physical_condition2)
omh

Unnamed: 0_level_0,Physical Condition,Unnamed: 2_level_0,Unknown,Fatality,Serious Injury,Minor Injury,Possible Injury,No Apparent Injury
Position In/On Vehicle,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,2001-01-01,1678,0,0,1,11,60,0
,2001-02-01,1309,0,0,0,5,18,0
,2001-03-01,1338,0,0,1,5,20,0
,2001-04-01,1323,3,0,0,4,25,0
,2001-05-01,1340,0,0,1,10,40,0
...,...,...,...,...,...,...,...,...
12,2020-08-01,14,0,0,0,8,0,33
12,2020-09-01,48,0,0,0,0,2,57
12,2020-10-01,40,8,0,0,1,11,145
12,2020-11-01,22,0,0,0,5,14,103


### Drivers

In [11]:
dki2 = omh.loc[('01',)]
dk2 = dki2['Fatality'].rename('dk2')
di2 = dki2[['Serious Injury', 'Minor Injury', 'Possible Injury' ]].sum(1).rename('di2')
dki2

Physical Condition,Unnamed: 1_level_0,Unknown,Fatality,Serious Injury,Minor Injury,Possible Injury,No Apparent Injury
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-01-01,37420,14,24,144,1048,5121,0
2001-02-01,30837,10,37,126,938,4054,0
2001-03-01,30542,13,47,128,1007,4436,0
2001-04-01,29509,27,25,141,1107,4337,0
2001-05-01,33962,11,34,149,1244,5299,0
...,...,...,...,...,...,...,...
2020-08-01,10648,387,33,201,1528,2217,14669
2020-09-01,9324,372,35,160,1351,2041,13872
2020-10-01,9743,432,32,186,1450,2465,16452
2020-11-01,8898,329,35,161,1371,2175,16064


### Passengers

In [12]:
oki2 = pd.concat([ omh.loc[(k,)] for k in [ '%02d' % i for i in range(2, 13) ] ])
oki2 = oki2.groupby(level=0).sum()
ok2 = oki2['Fatality'].rename('ok2')
oi2 = oki2[['Serious Injury', 'Minor Injury', 'Possible Injury' ]].sum(1).rename('oi2')
oki2

Physical Condition,Unnamed: 1_level_0,Unknown,Fatality,Serious Injury,Minor Injury,Possible Injury,No Apparent Injury
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-01-01,12935,6,9,50,390,2129,0
2001-02-01,10868,0,11,38,366,1760,0
2001-03-01,11264,0,8,41,424,2083,0
2001-04-01,11732,4,7,33,495,2081,0
2001-05-01,13444,5,19,55,518,2436,0
...,...,...,...,...,...,...,...
2020-08-01,3632,68,13,79,555,989,5433
2020-09-01,2850,27,4,54,431,746,4477
2020-10-01,2881,55,5,38,395,862,5408
2020-11-01,2491,25,8,45,439,754,5123


### Unknown vehicle position

In [13]:
uki2 = pd.concat([ omh.loc[(k,)] for k in [ '', '00' ] ])
uki2 = uki2.groupby(level=0).sum()
uk2 = uki2['Fatality'].rename('uk2')
ui2 = uki2[['Serious Injury', 'Minor Injury', 'Possible Injury' ]].sum(1).rename('ui2')
uki2

Physical Condition,Unnamed: 1_level_0,Unknown,Fatality,Serious Injury,Minor Injury,Possible Injury,No Apparent Injury
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-01-01,1695,4,2,1,18,69,0
2001-02-01,1314,1,1,1,6,24,0
2001-03-01,1347,1,1,2,5,22,0
2001-04-01,1331,5,0,0,4,30,0
2001-05-01,1349,1,0,1,11,52,0
...,...,...,...,...,...,...,...
2020-08-01,520,34,1,4,12,9,62
2020-09-01,495,19,0,1,3,17,44
2020-10-01,444,15,0,2,4,13,81
2020-11-01,414,15,0,2,4,13,76


In [14]:
sxs(dk2, ok2, uk2, di2, oi2, ui2)

Unnamed: 0_level_0,dk2,ok2,uk2,di2,oi2,ui2
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-01,24,9,2,6313,2569,88
2001-02-01,37,11,1,5118,2164,31
2001-03-01,47,8,1,5571,2548,29
2001-04-01,25,7,0,5585,2609,34
2001-05-01,34,19,0,6692,3009,64
...,...,...,...,...,...,...
2020-08-01,33,13,1,3946,1623,25
2020-09-01,35,4,0,3552,1231,21
2020-10-01,32,5,0,4101,1295,19
2020-11-01,35,8,0,3707,1238,19


In [15]:
uk2.sum(), ui2.sum()

(101, 9004)

In [16]:
assert omh.Fatality.sum() == dk2.sum() + ok2.sum() + uk2.sum()

In [17]:
omh.index.get_level_values(0).unique()

Index(['', '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
       '11', '12'],
      dtype='object', name='Position In/On Vehicle')

## Check KSI stats from "Crashes" table vs. "Pedestrians"/"Occupants" tables

In [18]:
pki = cs.reset_index(drop=True)
pki = (
    pki
    .assign(Month=pki.Date.dt.to_period('M').astype('datetime64'))
    [['Month', 'Pedestrians Killed', 'Pedestrians Injured']]
    .groupby('Month').sum()
)
pk1 = pki['Pedestrians Killed'].rename('pk1')
pi1 = pki['Pedestrians Injured'].rename('pi1')
pki

Unnamed: 0_level_0,Pedestrians Killed,Pedestrians Injured
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-01-01,8,520
2001-02-01,11,367
2001-03-01,9,471
2001-04-01,6,504
2001-05-01,11,500
...,...,...
2020-08-01,9,219
2020-09-01,24,235
2020-10-01,15,265
2020-11-01,16,239


In [19]:
sxs(pk1, pk2, bk2)[pk1 != pk2]

Unnamed: 0_level_0,pk1,pk2,bk2
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-07-01,11,9,5
2006-04-01,7,6,0
2006-05-01,12,13,1
2006-06-01,8,9,1
2006-09-01,15,16,1
2006-11-01,16,15,0
2007-09-01,7,8,1
2008-04-01,12,13,1
2008-07-01,14,13,1
2009-08-01,12,13,1


In [20]:
pk1.sum(), pk2.sum(), bk2.sum()

(3145, 3160, 282)

In [21]:
tki = cs.reset_index(drop=True)
tki = (
    tki
    .assign(Month=tki.Date.dt.to_period('M').astype('datetime64'))
    [['Month', 'Total Killed', 'Total Injured']]
    .groupby('Month').sum()
)
tk1 = tki['Total Killed'].rename('tk1')
ti1 = tki['Total Injured'].rename('pi1')
tki

Unnamed: 0_level_0,Total Killed,Total Injured
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-01-01,44,9549
2001-02-01,61,7733
2001-03-01,65,8696
2001-04-01,41,8902
2001-05-01,68,10483
...,...,...
2020-08-01,57,6004
2020-09-01,64,5261
2020-10-01,54,5827
2020-11-01,62,5313


In [22]:
tk2 = (pk2 + ok2 + dk2 + bk2 + uk2).rename('tk2')
ti2 = (pi2 + oi2 + di2 + bi2 + ui2).rename('ti2')
sxs(tk1, tk2)[tk1 != tk2]

Unnamed: 0_level_0,tk1,tk2
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-02-01,40,39
2006-03-01,62,61
2006-04-01,64,63
2006-05-01,68,67
2006-06-01,46,47
2006-07-01,66,65
2006-08-01,67,65
2006-11-01,60,59
2007-09-01,60,59
2008-04-01,55,54


In [23]:
tk1.sum(), tk2.sum(), ti1.sum(), ti2.sum()

(12778, 12773, 1869823, 1869714)

In [24]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import qualitative
default = qualitative.Plotly
colors = [default[i] for i in [1, 3, 0, 2]]
from utz.colors import swatches
swatches(default)
swatches(colors)

<span style="font-family: monospace">#636EFA <span style="color: #636EFA">██████</span></span> <span style="font-family: monospace">#EF553B <span style="color: #EF553B">██████</span></span> <span style="font-family: monospace">#00CC96 <span style="color: #00CC96">██████</span></span> <span style="font-family: monospace">#AB63FA <span style="color: #AB63FA">██████</span></span> <span style="font-family: monospace">#FFA15A <span style="color: #FFA15A">██████</span></span> <span style="font-family: monospace">#19D3F3 <span style="color: #19D3F3">██████</span></span> <span style="font-family: monospace">#FF6692 <span style="color: #FF6692">██████</span></span> <span style="font-family: monospace">#B6E880 <span style="color: #B6E880">██████</span></span> <span style="font-family: monospace">#FF97FF <span style="color: #FF97FF">██████</span></span> <span style="font-family: monospace">#FECB52 <span style="color: #FECB52">██████</span></span>

<span style="font-family: monospace">#EF553B <span style="color: #EF553B">██████</span></span> <span style="font-family: monospace">#AB63FA <span style="color: #AB63FA">██████</span></span> <span style="font-family: monospace">#636EFA <span style="color: #636EFA">██████</span></span> <span style="font-family: monospace">#00CC96 <span style="color: #00CC96">██████</span></span>

In [25]:
spc = read_parquet('../data/crashes.pqt')
spc

Unnamed: 0_level_0,CCODE,CNAME,MCODE,MNAME,HIGHWAY,LOCATION,FATALITIES,INJURIES,STREET,FATAL_D,FATAL_P,FATAL_T,FATAL_B,dt
ACCID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1703,01,Atlantic,0102,Atlantic City,446,State/Interstate Authority 446 S MP 1,1.0,1.0,,,,,,2008-01-01 00:35:00
1681,09,Hudson,0910,Union City,,Bergenline Ave S MP 0 at 6th St,1.0,,Bergenline Ave,,,,,2008-01-01 04:11:00
1659,04,Camden,0415,Gloucester Twsp,42,State Highway 42 N MP 8.2,1.0,1.0,,,,,,2008-01-01 06:46:00
1661,20,Union,2004,Elizabeth City,624,County 624 W MP 2.2 at Ikea Dr,1.0,1.0,,,,,,2008-01-01 12:29:00
1811,07,Essex,0716,Nutley Town,648,County 648 E MP .87 at Franklin Ave,1.0,,,,,,,2008-01-01 18:53:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12406,07,Essex,0714,Newark City,78,Interstate 78 MP 56.1,1.0,,,1.0,0.0,0.0,0.0,2023-04-12 23:17:00
12407,02,Bergen,0205,Carlstadt Boro,17,State Highway 17,1.0,,,1.0,0.0,0.0,0.0,2023-04-13 03:23:00
12408,09,Hudson,0907,Kearny Town,,Bergen Ave,1.0,,Bergen Ave,1.0,0.0,0.0,0.0,2023-04-14 02:00:00
12413,20,Union,2002,Clark Twsp,,Woodcrest Dr,1.0,,Woodcrest Dr,0.0,0.0,1.0,0.0,2023-04-14 09:41:00


In [26]:
cur_dt = to_dt(dt.now())
cur_yr = cur_dt.year
cur_mo = to_dt(dt.now()).to_period('M').to_timestamp()
cur_yr, cur_mo

(2023, Timestamp('2023-04-01 00:00:00'))

In [27]:
spts = spc[[f'FATAL_{t}' for t in 'DPTB' ]].groupby(spc.dt.dt.to_period('M').astype('datetime64')).sum()
sp_idx = spts.index.to_series()
spts = spts[(sp_idx.dt.year >= 2020) & (sp_idx < cur_mo)].astype(int)
sp_idx = spts.index.to_series()
sp_year = sp_idx.dt.year
spts.index.name = 'Month'
spts20 = spts[sp_year == 2020].astype(int)
spts21 = spts[sp_year >= 2021].astype(int)
spts

Unnamed: 0_level_0,FATAL_D,FATAL_P,FATAL_T,FATAL_B
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,21,8,18,2
2020-02-01,15,2,17,0
2020-03-01,11,8,16,1
2020-04-01,17,2,8,1
2020-05-01,28,9,13,2
2020-06-01,30,9,8,0
2020-07-01,30,8,19,5
2020-08-01,31,14,8,1
2020-09-01,31,5,21,0
2020-10-01,33,5,17,3


In [45]:
def ksit(ksi, njsp_col):
    ki = (
        ksi
        [[
            'Fatality',
            'Serious Injury',
            'Minor Injury',
            'Possible Injury',
        ]]
    )
    ki21 = spts21[njsp_col].rename('Fatality')
    ki = pd.concat([ ki, ki21.to_frame() ])
    return ki

In [46]:
api = ksit(pki2, 'FATAL_T')
abi = ksit(bki2, 'FATAL_B')
adi = ksit(dki2, 'FATAL_D')
aoi = ksit(oki2, 'FATAL_P')
dfs = []
for typ, df in { 'Drivers': adi, 'Passengers': aoi, 'Pedestrians': api, 'Cyclists': abi, }.items():
    dfl = df.copy()
    dfl.columns = pd.MultiIndex.from_product([[typ], dfl.columns])
    dfs.append(dfl)
ksia = sxs(*dfs)
ksia

Unnamed: 0_level_0,Drivers,Drivers,Drivers,Drivers,Passengers,Passengers,Passengers,Passengers,Pedestrians,Pedestrians,Pedestrians,Pedestrians,Cyclists,Cyclists,Cyclists,Cyclists
Unnamed: 0_level_1,Fatality,Serious Injury,Minor Injury,Possible Injury,Fatality,Serious Injury,Minor Injury,Possible Injury,Fatality,Serious Injury,Minor Injury,Possible Injury,Fatality,Serious Injury,Minor Injury,Possible Injury
Month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2001-01-01,24,144.0,1048.0,5121.0,9,50.0,390.0,2129.0,8,34.0,150.0,336.0,1,2.0,20.0,37.0
2001-02-01,37,126.0,938.0,4054.0,11,38.0,366.0,1760.0,11,19.0,118.0,230.0,1,3.0,16.0,34.0
2001-03-01,47,128.0,1007.0,4436.0,8,41.0,424.0,2083.0,9,40.0,130.0,297.0,0,7.0,26.0,48.0
2001-04-01,25,141.0,1107.0,4337.0,7,33.0,495.0,2081.0,6,38.0,171.0,281.0,3,13.0,65.0,106.0
2001-05-01,34,149.0,1244.0,5299.0,19,55.0,518.0,2436.0,11,42.0,176.0,275.0,4,9.0,90.0,126.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-11-01,31,,,,11,,,,17,,,,0,,,
2022-12-01,24,,,,6,,,,20,,,,1,,,
2023-01-01,17,,,,5,,,,25,,,,2,,,
2023-02-01,23,,,,4,,,,11,,,,0,,,


In [47]:
def ksim_plot(df, title, name, W=800, H=550, hide_after=None):
    means = df.rolling(12).mean()
    means = means.rename(columns={
        c: f'{c} (12mo avg)'
        for c in means.columns
    })
    years = list(range(int(START_YEAR), cur_yr + 1))

    fig = px.bar(
        df,
        color_discrete_sequence=colors,
        labels={'value': '', 'Month': '', 'variable': '',},
    ).update_layout(
        hovermode='x',
        plot_bgcolor="white",
        xaxis=dict(
            tickmode='array',
            tickvals=years,
            ticktext=years,
            tickangle=-45,
        ),
        legend=dict(orientation='h', x=0.5, xanchor='center'),
        title=dict(
            text=title,
            x=0.5,
        ),
    ).update_xaxes(
        gridcolor='#ccc',
    ).update_yaxes(
        gridcolor='#ccc',
    ).update_traces(
        hovertemplate='%{y:,d}',
    )
    for i, c in enumerate(means.columns):
        fig.add_trace(go.Scatter(name=c, x=means.index, y=means[c], line=dict(color=colors[i], width=3)))

    for idx, col in enumerate([ e for p in zip(df.columns, means.columns) for e in p ]):
        kwargs = dict(legendrank=100+idx)
        if hide_after and idx >= hide_after:
            kwargs['visible'] = 'legendonly'
        fig.update_traces(
            **kwargs,
            selector=dict(name=col),
        )
    plots_dir = '../www/public/plots/njdot'
    fig.write_json(f'{plots_dir}/{name}-by-month.json')
    fig.write_image(f'{plots_dir}/{name}-by-month.png', width=W, height=H,)
    return fig

In [48]:
def ksiy_plot(df, title, name):
    df = df.groupby(api.index.to_series().dt.year).sum().replace(0, nan)
    df.index.name = 'Year'
    df = df.loc[df.index < cur_yr]
    fig = px.bar(
        df,
        color_discrete_sequence=colors,
        labels={'value': '', 'Year': '', 'variable': '',},
    ).update_layout(
        hovermode='x',
        plot_bgcolor="white",
        legend=dict(orientation='h', x=0.5, xanchor='center'),
        title=dict(
            text=title,
            x=0.5,
        ),
    ).update_yaxes(
        gridcolor='#ccc',
    ).update_traces(
        hovertemplate='%{y:,d}',
    )

    plots_dir = '../www/public/plots/njdot'
    fig.write_json(f'{plots_dir}/{name}-by-year.json')
    fig.write_image(f'{plots_dir}/{name}-by-year.png')
    return fig

In [49]:
from IPython.display import display

In [50]:
def typ_plots(df, typ):
    n = typ.lower()[:-1]
    display(ksim_plot(df[['Fatality', 'Serious Injury']], f'{typ} killed and seriously injured (NJ)', f'{n}-injuries-ks'))
    display(ksiy_plot(df[['Fatality', 'Serious Injury']], f'{typ} killed and seriously injured (NJ)', f'{n}-injuries-ks'))
    display(ksim_plot(df[['Fatality', 'Serious Injury', 'Minor Injury']], f'{typ} killed and injured (NJ)', f'{n}-injuries-ksm'))
    display(ksiy_plot(df[['Fatality', 'Serious Injury', 'Minor Injury']], f'{typ} killed and injured (NJ)', f'{n}-injuries-ksm'))
    display(ksim_plot(df, f'{typ} killed and injured (NJ)', f'{n}-injuries-ksmp'))
    display(ksiy_plot(df, f'{typ} killed and injured (NJ)', f'{n}-injuries-ksmp'))
    

### Drivers

In [51]:
typ_plots(adi, 'Drivers')

### Passengers

In [52]:
typ_plots(aoi, 'Passengers')

### Pedestrians

In [53]:
typ_plots(api, 'Pedestrians')

### Cyclists

In [54]:
typ_plots(abi, 'Cyclists')

## Check 2020 Fatality counts (NJSP vs. NJDOT)

### Pedestrians

In [39]:
pk3 = api.loc[api.index.to_series().dt.year == 2020, 'Fatality'].rename('pk3')
pk4 = spts20.FATAL_T.rename('pk4')
p_cmp = sxs(pk3, pk4)
p_cmp[pk3 != pk4]

Unnamed: 0_level_0,pk3,pk4
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,16,18
2020-03-01,17,16
2020-04-01,7,8
2020-05-01,12,13
2020-08-01,9,8
2020-09-01,24,21
2020-10-01,15,17
2020-12-01,13,18


In [40]:
px.bar(p_cmp, barmode='group')

In [41]:
p_cmp.sum()

pk3    173
pk4    179
dtype: int64

### Cyclists

In [42]:
bk3 = abi.loc[abi.index.to_series().dt.year == 2020, 'Fatality'].rename('bk3')
bk4 = spts20.FATAL_B.rename('bk4')
b_cmp = sxs(bk3, bk4)
b_cmp[bk3 != bk4]

Unnamed: 0_level_0,bk3,bk4
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-01,2,1
2020-07-01,3,5
2020-09-01,1,0
2020-10-01,2,3


In [43]:
px.bar(b_cmp, barmode='group')

In [44]:
b_cmp.sum()

bk3    17
bk4    18
dtype: int64