## Automating Regional Briefings
Author: Smitha Mahesh, Eric Englin

Purpose: To complete the analyses and visualizations required for the region-level briefings. 

In [17]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import docx

In [21]:
myworkingdirectory = r"C:\Users\Eric.Englin\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing"
os.chdir(myworkingdirectory)

In [22]:
analysis_2022 = pd.read_csv(".//Data//Output Data//analysis_database_2022.csv")

In [4]:
analysis_2022.columns

Index(['INCID_NO', 'NUM_OCC', '5_mph', '10_mph', '15_mph', '20_mph', '25_mph',
       '30_mph', '35_mph', '40_mph', '45_mph', '50_mph', '55_mph', '60_mph',
       '65_mph', '70_mph', '75_mph', 'no_posted_speed', 'Crosswalk',
       'Outside a Crosswalk', 'No Injury', 'Possible Injury',
       'Non-incapacitating Injury', 'Incapacitating Injury', 'Fatality',
       'Unknown Injury', 'Num_Fatalities', 'Num_Injuries', 'Injury or Fatal',
       'Pedestrian', 'Bicycle', 'Pedacycle', 'VRU', 'HorseLlama', 'Cow',
       'Deer', 'Elk', 'Moose', 'Buffalo', 'Bear', 'Antelope', 'SheepGoats',
       'OtherWild', 'OtherDomestic', 'Involving Animal', 'LATITUDE',
       'LONGITUDE', 'Park', 'RGN', 'CRASH_DATE', 'CRASH_TIME', 'CRASH_YEAR',
       'database'],
      dtype='object')

In [5]:
analysis_2022.head()

Unnamed: 0,INCID_NO,NUM_OCC,5_mph,10_mph,15_mph,20_mph,25_mph,30_mph,35_mph,40_mph,...,OtherDomestic,Involving Animal,LATITUDE,LONGITUDE,Park,RGN,CRASH_DATE,CRASH_TIME,CRASH_YEAR,database
0,ABLI070425075000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,,,ABLI,SER,2007-04-25,750,2007.0,CDS
1,ABLI070804175500,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,,,ABLI,SER,2007-08-04,1755,2007.0,CDS
2,ABLI091117170900,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,,,ABLI,SER,2009-11-17,1709,2009.0,CDS
3,ABLI121009110000,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,,,ABLI,SER,2012-10-09,1100,2012.0,CDS
4,ABLI140610163500,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,38.91205,-76.93412,ABLI,SER,2014-06-10,1635,2014.0,CDS


In [10]:
analysis_2022.groupby("Park").agg({
    "Num_Fatalities":sum,
    'Num_Injuries':sum, 
    'No Injury':sum, 
    'Possible Injury':sum,
    'Non-incapacitating Injury':sum, 
    'Incapacitating Injury':sum, 
    'Fatality':sum,
    'Unknown Injury':sum, 
    'Num_Fatalities':sum, 
    'Num_Injuries':sum, 
    'Injury or Fatal':sum,
    'INCID_NO':'count'
}).reset_index().sort_values("Injury or Fatal", ascending = False)

Unnamed: 0,Park,Num_Fatalities,Num_Injuries,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Injury or Fatal,INCID_NO
19,BAWA,189.0,8355.0,206867.0,8737.0,4097.0,1308.0,261.0,741.0,5796.0,28394
128,GWMP,79.0,4511.0,153650.0,4167.0,2232.0,707.0,114.0,470.0,3282.0,23006
200,NATR,177.0,4040.0,35416.0,2887.0,1828.0,1002.0,264.0,321.0,2528.0,7869
27,BLRI,85.0,3292.0,36147.0,1676.0,1877.0,1028.0,108.0,493.0,2413.0,7616
196,NACC,23.0,3137.0,102800.0,2270.0,1272.0,355.0,31.0,603.0,2333.0,14627
...,...,...,...,...,...,...,...,...,...,...,...
160,KEPA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
156,KATM,0.0,0.0,53.0,0.0,0.0,0.0,0.0,5.0,0.0,9
63,CONG,0.0,0.0,44.0,0.0,0.0,0.0,0.0,2.0,0.0,10
151,JOFI,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [31]:
def create_park_table(region):
    #create dataframe to have number of crashes for each region
    
    df = analysis_2022.loc[analysis_2022['RGN']==region]
    df = df.groupby("Park").agg({
            "Num_Fatalities":sum,
            'Num_Injuries':sum, 
            'No Injury':sum, 
            'Possible Injury':sum,
            'Non-incapacitating Injury':sum, 
            'Incapacitating Injury':sum, 
            'Fatality':sum,
            'Unknown Injury':sum, 
            'Num_Fatalities':sum, 
            'Num_Injuries':sum, 
            'Injury or Fatal':sum,
            'INCID_NO':'count'
        }).reset_index()
    
    df['Number of Crashes'] = df['INCID_NO']
    df['Number of Fatalities'] = df['Num_Fatalities']
    df['Number of Injuries'] = df['Num_Injuries'] #check if injuries means serious injuries
    
    df_final = df[['Park','Number of Fatalities','Number of Injuries','Number of Crashes']].sort_values("Number of Fatalities", ascending = False)
    
    return df_final


In [14]:
for x in analysis_2022.RGN.unique():
    print(x)
    

SER
NER
MWR
IMR
NCR
PWR
AKR


In [28]:
region = "IMR"
'./22-23\Drafted Deliverables\Regions/' + region + ".docx"

'./22-23\\Drafted Deliverables\\Regions/IMR.docx'

In [32]:
def make_table(region):
    df = create_park_table(region)
    doc_name = './22-23\Drafted Deliverables\Figures/Park level tables/' + region + ".docx"
    doc = docx.Document(doc_name)
    doc.add_heading("Injury & fatality, at All Region Park Units")
    t = doc.add_table(df.shape[0]+1, df.shape[1])

    for j in range(df.shape[-1]):
        t.cell(0,j).text = df.columns[j]

    for i in range(df.shape[0]):
        for j in range(df.shape[-1]):
            t.cell(i+1,j).text = str(df.values[i,j])

    doc.save(doc_name)

In [33]:
for x in analysis_2022.RGN.unique():
    print(x)
    make_table(x)


SER
NER
MWR
IMR
NCR
PWR
AKR
