# Import data

In [190]:
import pandas
base_data_url = "../datasets/project/road safety/road-safety-data-{}"

In [191]:
accidents_16 = pandas.read_csv(base_data_url.format("accidents-2016.csv"))
accidents_15 = pandas.read_csv(base_data_url.format("accidents-2015.csv"))
accidents_14 = pandas.read_csv(base_data_url.format("accidents-2005-2014.csv"))

In [192]:
accidents = pandas.DataFrame()
accidents = pandas.concat([accidents, accidents_15, accidents_14], axis=0)

In [193]:
# accidents

# Investigate correlations
1. Are serious or fatal accidents more likely in bad weather?
2. Are serious or fatal accidents more likely during the night?
3. Do serious or fatal accidents have, on average, a longer distance from a streetlight?
4. Do any times of day, days of week, or weeks of year have more serious or fatal accidents?

# 1: Bad weather
## Plotting

In [194]:
# Replace the weather with a number: 1 for bad weather, 0 for good
replacement_list = {'Fine + high winds': 1,
  'Fine no high winds': 0,
  'Fog or mist': 1,
  'Other': 0,
  'Raining + high winds': 1,
  'Raining no high winds': 1,
  'Snowing + high winds': 1,
  'Snowing no high winds': 1}

In [195]:
severities = ["Serious", "Slight", "Fatal"]

In [196]:
w_dict = {}
for severity in severities:
    w_dict[severity] = {}
    for weather_type in replacement_list.keys():
        matches = len(accidents[(accidents["Weather_Conditions"] == weather_type) &
                               (accidents["Accident_Severity"] == severity)])
        w_dict[severity][weather_type] = matches

In [197]:
# Plot results
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (40,20)

d_serious = w_dict["Serious"]
d_slight = w_dict["Slight"]
d_fatal = w_dict["Fatal"]


In [198]:
def plot_wtype(wtype, title):
    plt.figure()
    plt.title = title
    plt.bar(range(len(wtype)), list(wtype.values()), align='center')
    plt.xticks(range(len(wtype)), list(wtype.keys()))
    plt.show()

In [199]:
# plot_wtype(d_fatal, "Fatal")

## Hypothesis testing
$H_0$: Prop(bad, (serious|fatal)) = Prop(bad, slight)


$H_1$: Prop(bad, (serious|fatal)) > Prop(bad, slight)

For a p-value of 0.05.

In [200]:
# Replace the weather
accidents["Weather_Conditions"] = accidents["Weather_Conditions"].map(replacement_list)

In [201]:
slight_list = list(accidents[
    (accidents["Accident_Severity"] == "Slight")]
                   .Weather_Conditions)

In [202]:
bad_list = list(accidents[
    (accidents["Accident_Severity"] == ("Serious" or "Fatal"))]
                   .Weather_Conditions)

In [203]:
# Actually do the hypothesis test
from scipy.stats import ttest_ind
ttest_ind(slight_list, bad_list)

Ttest_indResult(statistic=1.5539014982290393, pvalue=0.12033987342840149)

p > 0.05, so the result is not significant and we accept the null hypothesis.

# 2: Times of day


In [205]:
accidents = pandas.concat([accidents, accidents_15, accidents_14], axis=0)

In [248]:
import datetime
from astral import Astral
def sun_at_day(day):
    city_name = 'Edinburgh'
    a = Astral()
    a.solar_depression = 'civil'
    city = a[city_name]
    sun = city.sun(date = day, local=False)
    return sun

def is_day(x):
    time_x = datetime.datetime.strptime(x + " UTC", '%d/%m/%Y %H:%M %Z')
    sun = sun_at_day(time_x)
    
    
    if (sun['sunrise'] < time_x < sun['sunset']):
        return "Daylight"

In [249]:
is_day("26/12/2015 19:50")

2015-12-26 08:43:57+00:00
2015-12-26 19:50:00


TypeError: can't compare offset-naive and offset-aware datetimes

In [209]:
#bad_accidents = accidents[(accidents["Accident_Severity"] == ("Serious" or "Fatal"))]

#bad_accidents = bad_accidents.Time.map(lambda x: hour_x(x))

In [213]:
#bins = [0, 6, 19, 24]

In [219]:
accidents.Date + " " + accidents.Time

0       26/12/2015 19:50
1       25/12/2015 23:40
2       14/12/2015 16:30
3       11/12/2015 15:50
4       03/01/2015 12:06
5        13/01/2015 8:15
6       09/01/2015 19:30
7       09/01/2015 17:40
8        19/01/2015 8:20
9       22/01/2015 17:50
10      25/01/2015 18:15
11      22/01/2015 14:55
12      07/02/2015 17:20
13      09/02/2015 10:30
14      03/02/2015 18:00
15      03/02/2015 16:05
16      17/02/2015 17:25
17      03/01/2015 19:30
18      03/03/2015 12:20
19       06/03/2015 8:35
20      26/02/2015 10:30
21       16/02/2015 9:30
22       06/02/2015 8:50
23      27/02/2015 19:30
24      18/02/2015 14:40
25      18/02/2015 14:00
26      07/02/2015 14:55
27      04/03/2015 17:20
28      05/01/2015 12:10
29      07/01/2015 17:15
              ...       
2269    09/10/2014 08:00
2270    10/10/2014 16:00
2271    12/09/2014 15:10
2272    08/09/2014 17:15
2273    03/10/2014 22:30
2274    16/10/2014 22:20
2275    17/10/2014 15:45
2276    07/11/2014 19:15
2277    12/11/2014 17:30
