# Import data

In [1]:
import pandas
base_data_url = "../datasets/project/road safety/road-safety-data-{}"

In [2]:
accidents_16 = pandas.read_csv(base_data_url.format("accidents-2016.csv"))
accidents_15 = pandas.read_csv(base_data_url.format("accidents-2015.csv"))
accidents_14 = pandas.read_csv(base_data_url.format("accidents-2005-2014.csv"))

In [3]:
accidents = pandas.DataFrame()
accidents = pandas.concat([accidents_16, accidents_15, accidents_14], axis=0)

In [4]:
# accidents

# Investigate correlations
1. Are serious or fatal accidents more likely in bad weather?
2. Are serious or fatal accidents more likely during the night?
3. Do serious or fatal accidents have, on average, a longer distance from a streetlight?
4. Do any times of day, days of week, or weeks of year have more serious or fatal accidents?

# 1: Bad weather
## Plotting

In [5]:
# Replace the weather with a number: 1 for bad weather, 0 for good
replacement_list = {'Fine + high winds': 1,
  'Fine no high winds': 0,
  'Fog or mist': 1,
  'Other': 0,
  'Raining + high winds': 1,
  'Raining no high winds': 1,
  'Snowing + high winds': 1,
  'Snowing no high winds': 1}

In [6]:
severities = ["Serious", "Slight", "Fatal"]

In [7]:
w_dict = {}
for severity in severities:
    w_dict[severity] = {}
    for weather_type in replacement_list.keys():
        matches = len(accidents[(accidents["Weather_Conditions"] == weather_type) &
                               (accidents["Accident_Severity"] == severity)])
        w_dict[severity][weather_type] = matches

In [8]:
# Plot results
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (40,20)

d_serious = w_dict["Serious"]
d_slight = w_dict["Slight"]
d_fatal = w_dict["Fatal"]


In [9]:
def plot_wtype(wtype, title):
    plt.figure()
    plt.title = title
    plt.bar(range(len(wtype)), list(wtype.values()), align='center')
    plt.xticks(range(len(wtype)), list(wtype.keys()))
    plt.show()

In [10]:
# plot_wtype(d_fatal, "Fatal")

## Hypothesis testing
$H_0$: Prop(bad, (serious|fatal)) = Prop(bad, slight)


$H_1$: Prop(bad, (serious|fatal)) > Prop(bad, slight)

For a p-value of 0.05.

In [11]:
# Replace the weather
accidents["Weather_Conditions"] = accidents["Weather_Conditions"].map(replacement_list)

In [12]:
slight_list = list(accidents[
    (accidents["Accident_Severity"] == "Slight")]
                   .Weather_Conditions)

In [13]:
bad_list = list(accidents[
    (accidents["Accident_Severity"] == ("Serious" or "Fatal"))]
                   .Weather_Conditions)

In [14]:
# Actually do the hypothesis test
from scipy.stats import ttest_ind
ttest_ind(slight_list, bad_list)

Ttest_indResult(statistic=1.1418463195447863, pvalue=0.25362583312719567)

p > 0.05, so the result is not significant and we accept the null hypothesis.

# 2: Times of day
$H_0$: Prop(serious, nighttime) = Prop(slight, nighttime)

$H_1$: Prop(serious, nighttime) > Prop(slight, nighttime)

p = 0.05.

In [15]:
accidents = pandas.concat([accidents, accidents_15, accidents_14], axis=0)

In [16]:
import datetime
from astral import Astral
city_name = 'Edinburgh'
a = Astral()
a.solar_depression = 'civil'
city = a[city_name]

def sun_at_day(day):
    sun = city.sun(date = day, local=False)
    return sun

def is_day(x):
    time_x = datetime.datetime.strptime(x + "+0000", '%d/%m/%Y %H:%M%z')
    sun = sun_at_day(time_x)
    
    
    if (sun['sunrise'] < time_x < sun['sunset']):
        return 0
    return 1

In [17]:
bad_accidents = accidents[(accidents["Accident_Severity"] == ("Serious" or "Fatal"))]
slight_accidents = accidents[(accidents["Accident_Severity"] == "Slight")]

In [18]:
bad_dt = bad_accidents.Date + " " + bad_accidents.Time
slight_dt = slight_accidents.Date + " " + slight_accidents.Time

In [19]:
bad_dt = bad_dt.map(lambda x: is_day(x))
slight_dt = slight_dt.map(lambda x: is_day(x))

In [20]:
from scipy.stats import ttest_ind
ttest_ind(bad_dt, slight_dt)

Ttest_indResult(statistic=2.2383240618397853, pvalue=0.025244367725947698)

p < 0.05 so we reject the null hypothesis: the proportion of bad accidents during the night is greater than slight accidents during the night.

4: 

# Pretty plots

In [21]:
import folium

colors = {'Slight': 'green', 'Serious': 'orange', 'Fatal': 'red'}

map_acc = folium.Map(location=[56.4620, -2.9707], zoom_start=12)


In [22]:
accidents = pandas.DataFrame()
# Something is up with _15, _14, so use only _16...
#accidents = pandas.concat([accidents_16, accidents_15, accidents_14], axis=0)
accidents = accidents_16

In [23]:
accidents.apply(lambda row:folium.CircleMarker(location=[row["Latitude"], row["Longitude"]], 
                                               radius=10, 
                                               fill_color=colors[row["Accident_Severity"]],
                                               fill = True,
                                               stroke=False,
                                               fill_opacity=0.5,
                                              popup = row["Accident_Index"])
                                             .add_to(map_acc), axis=1)

map_acc

In [33]:
cameras = pandas.read_csv("../datasets/project/cameras_wgs.csv", delimiter='\t')

In [39]:
cameras_coords = pandas.concat([cameras.Lat, cameras.Lon], axis=1)

In [77]:
def notnan(x):
    try: 
        int(x)
        return True
    except:
        return False

In [82]:
from scipy.spatial import Voronoi, voronoi_plot_2d
cameras_list = [list(x) for x in cameras_coords.values if notnan(x[0])]

vor = Voronoi(cameras_list) 