In [1]:
# package imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd

# Accidents

1. DataFrame - one row per accident
2. Include Date/Time and Location

In [2]:
# import traffic pickle file
df_traffic = pd.read_pickle('../data/interim/traffic_calls.pickle')

In [3]:
# columns of interest
target_columns = [
    'Type_',
    'TypeText',
    'TimeCreate',
    'Location'
]

# select relevant columns
df_traffic = df_traffic[target_columns]

In [4]:
# select only accidents
df_accidents = df_traffic[(df_traffic.Type_.str.slice(0,2) == '20') | (df_traffic.Type_.str.slice(0,3) == '100')]

In [5]:
# split location into lat/lon
df_accidents['location_split'] = df_accidents.Location.str.replace('[()]', '').str.split(',')
df_accidents['lat'] = df_accidents.location_split.str.get(0).astype('float')
df_accidents['lon'] = df_accidents.location_split.str.get(1).astype('float')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [6]:
# remove irrelevant columns
df_accidents.drop(columns=['Type_', 'TypeText', 'Location', 'location_split'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [7]:
# drop rows with missing lat/lon values
df_accidents.drop(df_accidents[df_accidents.lat <= 1].index, inplace=True)
df_accidents.drop(df_accidents[df_accidents.lat.isna()].index, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [8]:
# save accidents dataframe to pickle file
df_accidents.to_pickle('../data/interim/accidents.pickle')

# Locations

1. Street Blocks
2. Census Tracts
3. Neighborhoods

## Street Blocks

In [9]:
# import road centerline shapefile
gdf_streets = gpd.read_file('../data/raw/Road_Centerline/geo_export_c02761a8-1d85-477e-a5cf-01b9f22f4d88.shp')

In [10]:
# target columns
street_columns = [
    'joinid',
    'fromright',
    'centerline',
    'roadclass',
    'fullname',
    'toleft',
    'toright',
    'fedrtetype',
    'fullnameab',
    'objectid',
    'strtetype',
    'fromleft',
    'geometry'
]

In [11]:
# select target columns
gdf_streets = gdf_streets[street_columns]

In [12]:
# set index to objectid
gdf_streets.set_index('objectid', inplace=True)

In [13]:
# write streets to pickle file
gdf_streets.to_pickle('../data/interim/streets.pickle')

## Census Tracts

In [14]:
# import Louisiana Census Tract file
gdf_tracts = gpd.read_file('../data/raw/tl_2017_22_tract/tl_2017_22_tract.shp')

In [15]:
# select Orleans Parish
gdf_tracts = gdf_tracts[gdf_tracts['COUNTYFP'] == '071']

In [16]:
# select target columns
tract_columns = [
    'GEOID',
    'geometry'
]

In [17]:
# remove extraneous columns
gdf_tracts = gdf_tracts[tract_columns].set_index('GEOID')

In [18]:
# save to pickle file
gdf_tracts.to_pickle('../data/interim/tracts.pickle')

## Neighborhoods

In [19]:
# import neighborhood shapefile
gdf_nhoods = gpd.read_file('../data/raw/Neighborhood_Statistical_Areas/Neighborhood_Statistical_Areas.shp')

In [20]:
# identify target columns
nhood_columns = [
    'OBJECTID',
    'GNOCDC_LAB',
    'geometry'
]

In [21]:
# remove extraneous columns
gdf_nhoods = gdf_nhoods[nhood_columns].set_index('OBJECTID')

In [22]:
# save to pickle file
gdf_nhoods.to_pickle('../data/interim/nhoods.pickle')

# Date/Time

1. 15-min increments
2. 30-min increments
3. 1-hr increments
4. 1-day increments

In [23]:
# set start and end datetimes for ranges
range_start = pd.Timestamp('2012-01-01 00:00:00')
range_end   = pd.Timestamp('2018-07-01 00:00:00')

In [24]:
# create interval indexes
date_range_15_min = pd.interval_range(start=range_start, end=range_end, freq='15min', closed='left')
date_range_30_min = pd.interval_range(start=range_start, end=range_end, freq='30min', closed='left')
date_range_60_min = pd.interval_range(start=range_start, end=range_end, freq='60min', closed='left')
date_range_1_day = pd.interval_range(start=range_start, end=range_end, freq='D', closed='left')