# Analysis of Crash Data

using Jupyter on https://datascientistworkbench.com/

To see the Python code used in this Notebook, click the following button:

In [1]:
from IPython.display import display, HTML
HTML('''<script> code_show=true;function code_toggle() { if (code_show){$('div.input').hide();}
else {$('div.input').show(); } code_show = !code_show } $( document ).ready(code_toggle); </script>
<form action="javascript:code_toggle()"><input type="submit" value="python code(toggle on/off)."></form>''')

In [2]:
import requests
from zipfile import ZipFile 
import pandas as pd
import io 
from IPython.display import display

## Data Source

In [3]:
API_URL = 'https://www.data.vic.gov.au/api/3/action/package_show?id=crash-stats-data-extract'
print 'API',API_URL 
metadata_file = requests.get(API_URL).json()
data_file_url =  metadata_file['result']['resources'][0]['url']
print 'Data file url:', data_file_url

API https://www.data.vic.gov.au/api/3/action/package_show?id=crash-stats-data-extract
Data file url: https://vicroads-public.sharepoint.com/InformationAccess/Shared%20Documents/Road%20Safety/Crash/Accident/ACCIDENT.zip


## Data Files

Key data files are:

In [4]:
zipfile = requests.get(data_file_url).content
dataset = {}
with ZipFile(io.BytesIO(zipfile))  as z:
    for filename in z.namelist():
        print filename
        with z.open(filename,'r') as c:
            try:
                dataset[filename] = pd.read_csv(c,dtype=object)          
            except:
                pass

ACCIDENT.csv
ACCIDENT_EVENT.csv
ACCIDENT_LOCATION.csv
NODE_ID_COMPLEX_INT_ID.csv
ATMOSPHERIC_COND.csv
NODE.csv
PERSON.csv
ROAD_SURFACE_COND.csv
SUBDCA.csv
VEHICLE.csv
ACCIDENT_CHAINAGE.csv
Statistic Checks.csv


In [7]:
data = {}

## ACCIDENT.CSV

In [8]:
data['ACCIDENT.csv'] = dataset['ACCIDENT.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ACCIDENT.csv'] = data['ACCIDENT.csv'].set_index(['ACCIDENT_NO'])
print 'index:', data['ACCIDENT.csv'].index.names
data['ACCIDENT.csv'].dtypes

index: [u'ACCIDENT_NO']


ACCIDENTDATE            category
ACCIDENTTIME            category
ACCIDENT_TYPE           category
Accident Type Desc      category
DAY_OF_WEEK             category
Day Week Description    category
DCA_CODE                category
DCA Description         category
DIRECTORY               category
EDITION                 category
PAGE                    category
GRID_REFERENCE_X        category
GRID_REFERENCE_Y        category
LIGHT_CONDITION         category
Light Condition Desc    category
NODE_ID                 category
NO_OF_VEHICLES          category
NO_PERSONS              category
NO_PERSONS_INJ_2        category
NO_PERSONS_INJ_3        category
NO_PERSONS_KILLED       category
NO_PERSONS_NOT_INJ      category
POLICE_ATTEND           category
ROAD_GEOMETRY           category
Road Geometry Desc      category
SEVERITY                category
SPEED_ZONE              category
dtype: object

## ACCIDENT_EVENT.csv

In [9]:
data['ACCIDENT_EVENT.csv'] = dataset['ACCIDENT_EVENT.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ACCIDENT_EVENT.csv'] = data['ACCIDENT_EVENT.csv'].set_index(['ACCIDENT_NO','EVENT_SEQ_NO'])
print 'index:', data['ACCIDENT_EVENT.csv'].index.names
data['ACCIDENT_EVENT.csv'].dtypes

index: [u'ACCIDENT_NO', u'EVENT_SEQ_NO']


EVENT_TYPE                category
Event Type Desc           category
VEHICLE_1_ID              category
VEHICLE_1_COLL_PT         category
Vehicle 1 Coll Pt Desc    category
VEHICLE_2_ID              category
VEHICLE_2_COLL_PT         category
Vehicle 2 Coll Pt Desc    category
PERSON_ID                 category
OBJECT_TYPE               category
Object Type Desc          category
dtype: object

## ACCIDENT_LOCATION.csv

In [10]:
data['ACCIDENT_LOCATION.csv'] = dataset['ACCIDENT_LOCATION.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ACCIDENT_LOCATION.csv'] = data['ACCIDENT_LOCATION.csv'].set_index(['ACCIDENT_NO','NODE_ID'])
print 'index:', data['ACCIDENT_LOCATION.csv'].index.names
data['ACCIDENT_LOCATION.csv'].dtypes

index: [u'ACCIDENT_NO', u'NODE_ID']


ROAD_ROUTE_1          category
ROAD_NAME             category
ROAD_TYPE             category
ROAD_NAME_INT         category
ROAD_TYPE_INT         category
DISTANCE_LOCATION     category
DIRECTION_LOCATION    category
NEAREST_KM_POST       category
OFF_ROAD_LOCATION     category
dtype: object

## NODE_ID_COMPLEX_INT_ID.csv

In [11]:
data['NODE_ID_COMPLEX_INT_ID.csv'] = dataset['NODE_ID_COMPLEX_INT_ID.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['NODE_ID_COMPLEX_INT_ID.csv'] = data['NODE_ID_COMPLEX_INT_ID.csv'].set_index(['ACCIDENT_NO','COMPLEX_INT_NO'])
print 'index:', data['NODE_ID_COMPLEX_INT_ID.csv'].index.names
data['NODE_ID_COMPLEX_INT_ID.csv'].dtypes

index: [u'ACCIDENT_NO', u'COMPLEX_INT_NO']


NODE_ID    category
dtype: object

## ATMOSPHERIC_COND.csv

In [12]:
data['ATMOSPHERIC_COND.csv'] = dataset['ATMOSPHERIC_COND.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ATMOSPHERIC_COND.csv'] = data['ATMOSPHERIC_COND.csv'].set_index(['ACCIDENT_NO','ATMOSPH_COND'])
print data['ATMOSPHERIC_COND.csv'].index.names
print data['ATMOSPHERIC_COND.csv'].dtypes

[u'ACCIDENT_NO', u'ATMOSPH_COND']
ATMOSPH_COND_SEQ     category
Atmosph Cond Desc    category
dtype: object


## NODE.csv

In [13]:
data['NODE.csv'] = dataset['NODE.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['NODE.csv'].dtypes

ACCIDENT_NO       category
NODE_ID           category
NODE_TYPE         category
AMG_X             category
AMG_Y             category
LGA_NAME          category
Lga Name All      category
Region Name       category
Deg Urban Name    category
Lat               category
Long              category
Postcode No       category
dtype: object

## PERSON.csv

In [21]:
data['PERSON.csv'] = dataset['PERSON.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['PERSON.csv'] = dataset['PERSON.csv'].set_index(['ACCIDENT_NO','PERSON_ID','VEHICLE_ID'])
print data['PERSON.csv'].index.names
data['PERSON.csv'].dtypes

[u'ACCIDENT_NO', u'PERSON_ID', u'VEHICLE_ID']


SEX                    object
AGE                    object
Age Group              object
INJ_LEVEL              object
Inj Level Desc         object
SEATING_POSITION       object
HELMET_BELT_WORN       object
ROAD_USER_TYPE         object
Road User Type Desc    object
LICENCE_STATE          object
PEDEST_MOVEMENT        object
POSTCODE               object
TAKEN_HOSPITAL         object
EJECTED_CODE           object
dtype: object

### ROAD_SURFACE_COND.csv

In [15]:
data['ROAD_SURFACE_COND.csv'] = dataset['ROAD_SURFACE_COND.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ROAD_SURFACE_COND.csv'] = data['ROAD_SURFACE_COND.csv'].set_index(['ACCIDENT_NO','SURFACE_COND'])
print data['ROAD_SURFACE_COND.csv'].index.names
data['ROAD_SURFACE_COND.csv'].dtypes

[u'ACCIDENT_NO', u'SURFACE_COND']


Surface Cond Desc    category
SURFACE_COND_SEQ     category
dtype: object

## SUBDCA.csv

In [16]:
data['SUBDCA.csv'] = dataset['SUBDCA.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['SUBDCA.csv'] = data['SUBDCA.csv'].set_index(['ACCIDENT_NO','SUB_DCA_CODE'])
print 'index', data['SUBDCA.csv'].index.names
data['SUBDCA.csv'].dtypes

index [u'ACCIDENT_NO', u'SUB_DCA_CODE']


SUB_DCA_SEQ          category
Sub Dca Code Desc    category
dtype: object

## VEHICLE.csv

In [17]:
data['VEHICLE.csv'] = dataset['VEHICLE.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['VEHICLE.csv'] = data['VEHICLE.csv'].set_index(['ACCIDENT_NO','VEHICLE_ID'])
print 'index:', data['VEHICLE.csv'].index.names
data['VEHICLE.csv'].dtypes

index: [u'ACCIDENT_NO', u'VEHICLE_ID']


VEHICLE_YEAR_MANUF        category
VEHICLE_DCA_CODE          category
INITIAL_DIRECTION         category
ROAD_SURFACE_TYPE         category
Road Surface Type Desc    category
REG_STATE                 category
VEHICLE_BODY_STYLE        category
VEHICLE_MAKE              category
VEHICLE_MODEL             category
VEHICLE_POWER             category
VEHICLE_TYPE              category
Vehicle Type Desc         category
VEHICLE_WEIGHT            category
CONSTRUCTION_TYPE         category
FUEL_TYPE                 category
NO_OF_WHEELS              category
NO_OF_CYLINDERS           category
SEATING_CAPACITY          category
TARE_WEIGHT               category
TOTAL_NO_OCCUPANTS        category
CARRY_CAPACITY            category
CUBIC_CAPACITY            category
FINAL_DIRECTION           category
DRIVER_INTENT             category
VEHICLE_MOVEMENT          category
TRAILER_TYPE              category
VEHICLE_COLOUR_1          category
VEHICLE_COLOUR_2          category
CAUGHT_FIRE         

## ACCIDENT_CHAINAGE.csv

In [18]:
data['ACCIDENT_CHAINAGE.csv'] = dataset['ACCIDENT_CHAINAGE.csv'].apply(lambda r: pd.Categorical(r,ordered=True))
data['ACCIDENT_CHAINAGE.csv'] = data['ACCIDENT_CHAINAGE.csv'].set_index(['Node Id','Route No','Chainage Seq'])
print 'index', data['ACCIDENT_CHAINAGE.csv'].index.names
data['ACCIDENT_CHAINAGE.csv'].dtypes

index [u'Node Id', u'Route No', u'Chainage Seq']


Route Link No    category
Chainage         category
dtype: object