In [1]:
from utz import *
from njsp.paths import CRASHES_PQT, PROJECTED_CSV, ROOT_DIR
from njsp.ytc import to_ytc
from njsp.ytd import oldest_commit_rundate_since, projected_roy_deaths
from njsp import Ytd

In [2]:
ytd = Ytd()
prv_rundate = ytd.prv_rundate
prv_rundate

'2023-02-02'

Find the oldest commit with rundate less than 1 year ago, load crashes from that time:

In [3]:
with cd(ROOT_DIR):
    prv_commit = oldest_commit_rundate_since(prv_rundate)

crashes_relpath = relpath(CRASHES_PQT, ROOT_DIR)
prv_crashes_blob = prv_commit.tree[crashes_relpath]
stream = prv_crashes_blob.data_stream
blob = stream.read()
prv_crashes = read_parquet(BytesIO(blob))
prv_crashes

Searching for oldest commit with rundate ≥2023-02-02
Found rundate 2023-02-01 10:00:07 < 2023-02-02 at commit 38be41e; returning commit d2491ef


Unnamed: 0_level_0,CCODE,CNAME,MCODE,MNAME,HIGHWAY,LOCATION,FATALITIES,INJURIES,STREET,FATAL_D,FATAL_P,FATAL_T,FATAL_B,dt
ACCID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1703,01,Atlantic,0102,Atlantic City,446,State/Interstate Authority 446 S MP 1,1.0,1.0,,,,,,2008-01-01 00:35:00
1681,09,Hudson,0910,Union City,,Bergenline Ave S MP 0 at 6th St,1.0,,Bergenline Ave,,,,,2008-01-01 04:11:00
1659,04,Camden,0415,Gloucester Twsp,42,State Highway 42 N MP 8.2,1.0,1.0,,,,,,2008-01-01 06:46:00
1661,20,Union,2004,Elizabeth City,624,County 624 W MP 2.2 at Ikea Dr,1.0,1.0,,,,,,2008-01-01 12:29:00
1664,07,Essex,0714,Newark City,95,State/Interstate Authority 95 S MP 104,1.0,0.0,,,,,,2008-01-02 09:09:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12295,15,Ocean,1512,Lacey Twsp,9,State Highway 9,1.0,0.0,,1.0,0.0,0.0,0.0,2023-01-30 17:59:00
12294,13,Monmouth,1316,Freehold Twsp,9,State Highway 9,1.0,,,0.0,0.0,1.0,0.0,2023-01-30 18:49:00
12308,07,Essex,0714,Newark City,,North 9th St,1.0,2.0,North 9th St,0.0,0.0,1.0,0.0,2023-01-31 17:50:00
12303,05,Cape May,0504,Dennis Twsp,47,State Highway 47 MP 16.85,1.0,0.0,,1.0,0.0,0.0,0.0,2023-01-31 19:33:00


Load previous year YTD and final counts, and current year YTD counts:

In [4]:
prv_year = ytd.prv_year
cur_year = ytd.cur_year

prv_ytc = to_ytc(prv_crashes)
prv_ytd = prv_ytc.loc[prv_year]

cur_ytc = to_ytc(read_parquet(CRASHES_PQT))
prv_end = cur_ytc.loc[prv_year]
cur_ytd = cur_ytc.loc[cur_year]

cur_ytd

Unnamed: 0_level_0,driver,passenger,pedestrian,cyclist,crashes
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Atlantic,2,0,1,0,3
Bergen,2,0,3,0,5
Burlington,2,1,0,0,3
Camden,0,0,3,0,3
Cumberland,0,0,1,0,1
Essex,0,0,2,0,2
Gloucester,1,0,0,0,1
Hudson,1,0,1,0,2
Hunterdon,0,0,1,0,1
Mercer,2,0,0,0,2


Compute fraction of current year that has elapsed (year-to-date; "YTD") and still remains (rest-of-year; "ROY"):

In [5]:
cur_ytd_frac = ytd.cur_year_frac
cur_roy_frac = 1 - cur_ytd_frac
cur_ytd_frac, cur_roy_frac

(0.08857019075086014, 0.9114298092491399)

Combine previous year YTD / end with current year YTD:

In [6]:
def melt(df, name):
    return (
        df
        .melt(ignore_index=False, var_name='type')
        .set_index('type', append=True)
        .value
        .rename(name)
    )

z = sxs(
    melt(prv_ytd, 'prv_ytd'),
    melt(prv_end, 'prv_end'),
    melt(cur_ytd, 'cur_ytd'),
).fillna(0).astype(int)
z

Unnamed: 0_level_0,Unnamed: 1_level_0,prv_ytd,prv_end,cur_ytd
county,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Atlantic,driver,1,17,2
Bergen,driver,0,21,2
Camden,driver,1,19,0
Cape May,driver,1,4,0
Essex,driver,1,23,0
...,...,...,...,...
Burlington,crashes,0,34,3
Cumberland,crashes,0,20,1
Salem,crashes,0,11,0
Somerset,crashes,0,22,0


In [7]:
def project_roy(r):
    return int(round(projected_roy_deaths(r.prv_ytd, r.prv_end, r.cur_ytd, cur_ytd_frac)))

roy = z.apply(project_roy, axis=1).rename('roy')
projected = (
    (z.cur_ytd + roy)
    .rename('projected')
    .reset_index(level=1)
    .pivot(columns='type', values='projected')
)
projected

type,crashes,cyclist,driver,passenger,pedestrian
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Atlantic,36,2,19,6,12
Bergen,44,1,23,4,13
Burlington,37,1,28,5,5
Camden,41,5,16,5,16
Cape May,5,0,3,1,2
Cumberland,20,0,12,5,6
Essex,43,2,20,4,21
Gloucester,31,1,20,5,5
Hudson,25,3,11,3,10
Hunterdon,4,0,3,0,2


In [8]:
projected.to_csv(PROJECTED_CSV)