# Connecticut, USA

**Source of original dataset:** https://www.ctcrash.uconn.edu/

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Suspected Serious Injury, Suspected Minor Injury, Possible Injury, No Apparent Injury

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj
import glob

Setup input files

In [None]:
data_dir = "..\\data\\connecticut\\"

files = glob.glob(data_dir+'*_0.csv')

Read original data

In [None]:
data = []
for x in files:
    print(x)
    aux = pd.read_csv(x, encoding = "ANSI", skiprows=1)
    data.append(aux)

In [None]:
list_of_dfs = data
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts)))

Create Datetime column

In [None]:
data['Date'] = data['Date Of Crash'].astype('str') + ' 00:00'
data['Date'] = pd.to_datetime(data['Date'])

Some key statistics

In [None]:
data['fatal']= 0
data['suspected_serious_injury']= 0
data['suspected_minor_injury']= 0
data['possible_injury']= 0
data['no_apparent_injury']= 0

data.loc[data['Most Severe Injury'] == 'K', 'fatal'] = 1
data.loc[data['Most Severe Injury'] == 'A', 'suspected_serious_injury'] = 1
data.loc[data['Most Severe Injury'] == 'B', 'suspected_minor_injury'] = 1
data.loc[data['Most Severe Injury'] == 'C', 'possible_injury'] = 1
data.loc[data['Most Severe Injury'] == 'O', 'no_apparent_injury'] = 1

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data['fatal'].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

serious_injuries = data["suspected_serious_injury"].sum()
print("There are a total of "+str(serious_injuries)+" seriously injured.")

minor_injuries = data["suspected_minor_injury"].sum()
print("There are a total of "+str(minor_injuries)+" injured.")

possible_injuries = data["possible_injury"].sum()
print("There are a total of "+str(possible_injuries)+" injured.")

Slice all bicycle accidents

In [None]:
data_bicycles = data

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_connecticut.csv')
print('Wrote file to: cycling_safety_connecticut.csv')