In [3]:
import pandas as pd
import numpy as np
import copy
import holoviews as hv
import plotly.figure_factory as ff
import time
import matplotlib.pyplot as plt
from collections import Counter

import geopandas as gpd
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter


In [2]:
def fill_chart(time_of_day,tod_name,df,prob = False):
    weather_types = list(df.columns)
    if prob:
        div = np.shape(time_of_day)[0]
    else:
        div = 1
    acc_data = [np.shape(time_of_day[time_of_day[:,2] == weather])[0]/div for weather in weather_types]
    df.loc[tod_name] = acc_data 
    return df

In [3]:
filename = './archive/US_Accidents_Dec20.csv'
data = pd.read_csv(filename,usecols = ['ID','Start_Time','Weather_Condition','Start_Lat','Start_Lng','County'])
data = data.dropna()

In [4]:
data 

Unnamed: 0,ID,Start_Time,Start_Lat,Start_Lng,County,Weather_Condition
0,A-1,2016-02-08 05:46:00,39.865147,-84.058723,Montgomery,Light Rain
1,A-2,2016-02-08 06:07:59,39.928059,-82.831184,Franklin,Light Rain
2,A-3,2016-02-08 06:49:27,39.063148,-84.032608,Clermont,Overcast
3,A-4,2016-02-08 07:23:34,39.747753,-84.205582,Montgomery,Mostly Cloudy
4,A-5,2016-02-08 07:39:07,39.627781,-84.188354,Montgomery,Mostly Cloudy
...,...,...,...,...,...,...
4232536,A-4239402,2019-08-23 18:03:25,34.002480,-117.379360,Riverside,Fair
4232537,A-4239403,2019-08-23 19:11:30,32.766960,-117.148060,San Diego,Fair
4232538,A-4239404,2019-08-23 19:00:21,33.775450,-117.847790,Orange,Partly Cloudy
4232539,A-4239405,2019-08-23 19:00:21,33.992460,-118.403020,Los Angeles,Fair


In [6]:
fips = pd.read_csv('./fips-codes/county_fips_master.csv', encoding='latin-1')
fips_n = fips.to_numpy()
CA_fips = fips_n[fips_n[:,2] == 'CA'][:,:2]
CA_fip_dict = {fc[1].rsplit(' ',1)[0]:fc[0] for fc in CA_fips}

In [7]:
CA_fip_dict

{'Alameda': 6001,
 'Alpine': 6003,
 'Amador': 6005,
 'Butte': 6007,
 'Calaveras': 6009,
 'Colusa': 6011,
 'Contra Costa': 6013,
 'Del Norte': 6015,
 'El Dorado': 6017,
 'Fresno': 6019,
 'Glenn': 6021,
 'Humboldt': 6023,
 'Imperial': 6025,
 'Inyo': 6027,
 'Kern': 6029,
 'Kings': 6031,
 'Lake': 6033,
 'Lassen': 6035,
 'Los Angeles': 6037,
 'Madera': 6039,
 'Marin': 6041,
 'Mariposa': 6043,
 'Mendocino': 6045,
 'Merced': 6047,
 'Modoc': 6049,
 'Mono': 6051,
 'Monterey': 6053,
 'Napa': 6055,
 'Nevada': 6057,
 'Orange': 6059,
 'Placer': 6061,
 'Plumas': 6063,
 'Riverside': 6065,
 'Sacramento': 6067,
 'San Benito': 6069,
 'San Bernardino': 6071,
 'San Diego': 6073,
 'San Francisco': 6075,
 'San Joaquin': 6077,
 'San Luis Obispo': 6079,
 'San Mateo': 6081,
 'Santa Barbara': 6083,
 'Santa Clara': 6085,
 'Santa Cruz': 6087,
 'Shasta': 6089,
 'Sierra': 6091,
 'Siskiyou': 6093,
 'Solano': 6095,
 'Sonoma': 6097,
 'Stanislaus': 6099,
 'Sutter': 6101,
 'Tehama': 6103,
 'Trinity': 6105,
 'Tulare': 61

In [7]:
timestamps = np.array([data.split()[1] for data in data['Start_Time'].to_numpy()])

In [8]:
timestamps

array(['05:46:00', '06:07:59', '06:49:27', ..., '19:00:21', '19:00:21',
       '18:52:06'], dtype='<U18')

In [9]:
res = 1
processed_data = np.array([data['ID'].to_numpy(),timestamps,data['Weather_Condition'],\
                           data['Start_Lat'],data['Start_Lng'],data['County']],dtype = object).T
#processed_data = processed_data[np.arange(0,len(processed_data),res)]

In [10]:
processed_data

array([['A-1', '05:46:00', 'Light Rain', 39.865147, -84.058723,
        'Montgomery'],
       ['A-2', '06:07:59', 'Light Rain', 39.928059, -82.831184,
        'Franklin'],
       ['A-3', '06:49:27', 'Overcast', 39.063148, -84.032608, 'Clermont'],
       ...,
       ['A-4239404', '19:00:21', 'Partly Cloudy', 33.77545, -117.84779,
        'Orange'],
       ['A-4239405', '19:00:21', 'Fair', 33.99246, -118.40302,
        'Los Angeles'],
       ['A-4239406', '18:52:06', 'Fair', 34.13393, -117.23092,
        'San Bernardino']], dtype=object)

In [11]:
rain_terms = ['Rain','Precipitation','Drizzle','Hail','Squalls','Ice','Thunder','Thunderstorm','T-Storm','Showers in the Vicinity','Rain and Sleet']
cloudy_terms = ['Cloudy','Cloud','Clouds','Thunder in the Vicinity']
clear_terms = ['Clear','Fair']
snow_terms = ['Snow','Wintry','Sleet']
dust_terms = ['Dust','Ash','Sand','Tornado']
fog_terms = ['Mist','Fog','Smoke','Haze','Overcast']

In [12]:
update = False
if update:
    grouped_weather = []
    for ind,w_string in enumerate(processed_data[:,2]):
        weather = np.array(w_string.split())
        if w_string in fog_terms or any(word in fog_terms for word in weather):
            group = 'Fog'
        elif w_string in snow_terms or any(word in snow_terms for word in weather):
            group = 'Snow'
        elif w_string in cloudy_terms or any(word in cloudy_terms for word in weather):
            group = 'Cloudy'
        elif w_string in rain_terms or any(word in rain_terms for word in weather):
            group = 'Rain'
        elif w_string in clear_terms or any(word in clear_terms for word in weather):
            group = 'Clear'
        elif w_string in dust_terms or any(word in dust_terms for word in weather):
            group = 'Dust'
        else:
            group = w_string
        processed_data[ind,2] = group


In [44]:
df = pd.DataFrame(processed_data,columns =  ['ID','Start_Time','Weather_Condition','Start_Lat','Start_Lng','County'])
df.to_csv('processed_data.csv')

In [1]:
processed_data = pd.read_csv('processed_data.csv',\
                             usecols = ['ID','Start_Time','Weather_Condition','Start_Lat','Start_Lng','County']).to_numpy()
processed_data

NameError: name 'pd' is not defined

In [8]:
fips = [CA_fip_dict[county] for county in processed_data[:,5] if county in CA_fip_dict]
fips

[6095,
 6001,
 6013,
 6085,
 6085,
 6001,
 6085,
 6075,
 6013,
 6001,
 6097,
 6013,
 6001,
 6085,
 6001,
 6067,
 6067,
 6085,
 6001,
 6085,
 6085,
 6001,
 6085,
 6097,
 6013,
 6097,
 6081,
 6067,
 6001,
 6001,
 6097,
 6013,
 6085,
 6085,
 6097,
 6081,
 6085,
 6075,
 6075,
 6085,
 6085,
 6113,
 6001,
 6001,
 6085,
 6067,
 6113,
 6067,
 6081,
 6081,
 6067,
 6001,
 6067,
 6001,
 6061,
 6001,
 6113,
 6013,
 6085,
 6081,
 6075,
 6001,
 6013,
 6067,
 6057,
 6067,
 6081,
 6095,
 6061,
 6067,
 6067,
 6067,
 6061,
 6067,
 6017,
 6041,
 6001,
 6013,
 6085,
 6001,
 6075,
 6001,
 6077,
 6001,
 6077,
 6045,
 6067,
 6061,
 6061,
 6061,
 6097,
 6085,
 6085,
 6013,
 6095,
 6067,
 6001,
 6057,
 6033,
 6001,
 6013,
 6055,
 6013,
 6085,
 6085,
 6075,
 6001,
 6001,
 6041,
 6081,
 6067,
 6001,
 6017,
 6095,
 6087,
 6001,
 6077,
 6085,
 6013,
 6013,
 6081,
 6067,
 6001,
 6013,
 6001,
 6067,
 6081,
 6053,
 6001,
 6067,
 6041,
 6097,
 6013,
 6013,
 6087,
 6053,
 6001,
 6053,
 6095,
 6001,
 6085,
 6001,
 6013,

In [9]:
fip_hist = Counter(fips)
fip_vals, values = list(fip_hist.keys()),list(fip_hist.values())
prob_values = list(np.array(values)/sum(values))

In [14]:
import plotly.graph_objects as go

In [21]:
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv")
fig = go.Figure()
fig.add_trace(go.Surface(z=df.values.tolist(), colorscale="Viridis"))
# # Update plot sizing
# fig.update_layout(
#     width=800,
#     height=900,
#     autosize=False,
#     margin=dict(t=0, b=0, l=0, r=0),
#     template="plotly_white",
# )
# # Update 3D scene options
# fig.update_scenes(
#     aspectratio=dict(x=1, y=1, z=0.7),
#     aspectmode="manual"
# )

# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=["type", "surface"],
                    label="3D Surface",
                    method="restyle"
                ),
                dict(
                    args=["type", "heatmap"],
                    label="Heatmap",
                    method="restyle"
                )
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top"
        ),
    ]
)

In [20]:
fig = ff.create_choropleth(fips=fip_vals, values=prob_values, scope = ['CA'])

fig.layout.template = None
# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=["type", "surface"],
                    label="3D Surface",
                    method="restyle"
                ),
                dict(
                    args=["type", "heatmap"],
                    label="Heatmap",
                    method="restyle"
                )
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top"
        ),
    ]
)
fig.show()

In [None]:
Drop Down Menu

In [32]:
fip_vals

1038438

In [25]:
a,b = np.array(Counter(fips))

TypeError: iteration over a 0-d array

In [169]:
a = {1:2,3:4}
2 in a

False

In [43]:
time_of_day = []
for time in processed_data[:,1]:
    hour = int(time.split(':')[0])
    if hour >= 6 and hour <= 11:
        time_of_day.append('day')
    elif hour >= 12 and hour <= 17:
        time_of_day.append('noon')
    else:
        time_of_day.append('night')
time_of_day = np.array(time_of_day)

In [45]:
day = processed_data[time_of_day == 'day']
noon = processed_data[time_of_day == 'noon']
night = processed_data[time_of_day == 'night']

In [68]:
columns = list(set(data.Weather_Condition))[1:]
index = ['Day','Noon','Night']
df = pd.DataFrame(columns = columns, index = index)
df = fill_chart(day,'Day',df,prob = True)
df = fill_chart(noon,'Noon',df,prob = True)
df = fill_chart(night,'Night',df,prob = True)
df

Unnamed: 0,Heavy Thunderstorms and Rain,Partly Cloudy,T-Storm / Windy,Sand / Dust Whirlwinds,Heavy Rain,N/A Precipitation,Snow,Snow / Windy,Thunder in the Vicinity,Heavy T-Storm / Windy,...,Drizzle and Fog,Thunder / Windy,Volcanic Ash,Light Snow,Light Snow and Sleet,Heavy Rain Showers,Overcast,Rain,Thunder / Wintry Mix,Snow Grains
Day,0.000352,0.090554,3.3e-05,1e-06,0.004694,0.000142,0.001805,4.3e-05,0.000375,5.1e-05,...,6.7e-05,1.2e-05,4e-06,0.016815,5e-06,0.0,0.114587,0.012172,3e-06,0.0
Noon,0.000818,0.111299,9.1e-05,1.9e-05,0.003951,9.9e-05,0.001646,6.8e-05,0.001799,8.9e-05,...,1.6e-05,5.9e-05,5e-06,0.009742,6e-06,1e-06,0.074906,0.010929,1e-06,3e-06
Night,0.000612,0.077246,6.6e-05,2e-06,0.00423,0.000231,0.000997,0.000131,0.000927,7.9e-05,...,3.3e-05,3.8e-05,7e-06,0.013748,6e-06,5e-06,0.077676,0.011292,0.0,0.0


In [81]:
columns = list(set(data.Weather_Condition))[1:]
index = ['Day','Noon','Night']
df_no_prob = pd.DataFrame(columns = columns, index = index)
df_no_prob = fill_chart(day,'Day',df_no_prob)
df_no_prob = fill_chart(noon,'Noon',df_no_prob)
df_no_prob = fill_chart(night,'Night',df_no_prob)
df_no_prob

Unnamed: 0,Heavy Thunderstorms and Rain,Partly Cloudy,T-Storm / Windy,Sand / Dust Whirlwinds,Heavy Rain,N/A Precipitation,Snow,Snow / Windy,Thunder in the Vicinity,Heavy T-Storm / Windy,...,Drizzle and Fog,Thunder / Windy,Volcanic Ash,Light Snow,Light Snow and Sleet,Heavy Rain Showers,Overcast,Rain,Thunder / Wintry Mix,Snow Grains
Day,550.0,141691.0,52.0,1.0,7345.0,222.0,2824.0,67.0,587.0,80.0,...,105.0,18.0,6.0,26310.0,8.0,0.0,179297.0,19045.0,4.0,0.0
Noon,1193.0,162258.0,133.0,28.0,5760.0,144.0,2400.0,99.0,2623.0,130.0,...,23.0,86.0,7.0,14203.0,9.0,1.0,109202.0,15933.0,2.0,4.0
Night,741.0,93466.0,80.0,2.0,5118.0,280.0,1206.0,158.0,1122.0,96.0,...,40.0,46.0,9.0,16635.0,7.0,6.0,93986.0,13663.0,0.0,0.0


In [79]:
columns = np.array(columns)
for weather in columns:
    print(len(processed_data[processed_data[:,2] == weather]))

2484
397415
265
31
18223
646
6430
324
4332
306
5
9
1
2497
32
19
901
82
204661
5
206389
1703
2
22
1
4
1
2730
3881
144
5019
329
311
3981
1725
808181
2946
19
1
4927
3
1
123
66
24
1001
3510
3
900872
1
4
1
3
3
30
6
19
1
9
6
10
7
6
6
153
571743
618
2215
4
262
349
45912
23
2736
33
5
8327
37
7
2
3
3442
1
549
1
5
2240
11
323340
11396
28
13
151
6272
69
3
2
2
4440
14594
1709
26
46794
23
22
6538
106
1
3071
99
3
4
2
157
151
841
1
139
1
101
27
168
150
22
57148
24
7
382485
48641
6
4


In [90]:
columns

['Heavy Thunderstorms and Rain',
 'Partly Cloudy',
 'T-Storm / Windy',
 'Sand / Dust Whirlwinds',
 'Heavy Rain',
 'N/A Precipitation',
 'Snow',
 'Snow / Windy',
 'Thunder in the Vicinity',
 'Heavy T-Storm / Windy',
 'Low Drifting Snow',
 'Light Snow and Sleet / Windy',
 'Snow and Thunder',
 'Wintry Mix',
 'Squalls / Windy',
 'Funnel Cloud',
 'Rain / Windy',
 'Blowing Snow / Windy',
 'Scattered Clouds',
 'Heavy Freezing Rain',
 'Light Rain',
 'Light Snow / Windy',
 'Sand / Dust Whirlwinds / Windy',
 'Light Sleet',
 'Freezing Rain / Windy',
 'Light Fog',
 'Sand / Dust Whirls Nearby',
 'Heavy T-Storm',
 'Partly Cloudy / Windy',
 'Heavy Snow / Windy',
 'T-Storm',
 'Heavy Drizzle',
 'Haze / Windy',
 'Light Rain with Thunder',
 'Heavy Snow',
 'Clear',
 'Drizzle',
 'Light Freezing Rain / Windy',
 'Heavy Smoke',
 'Light Thunderstorms and Rain',
 'Tornado',
 'Thunder and Hail',
 'Rain Showers',
 'Smoke / Windy',
 'Light Snow Showers',
 'Light Freezing Fog',
 'Thunder',
 'Light Hail',
 'Fair',
 

In [113]:
grouped_weather = []
for w_string in columns:
    weather = np.array(w_string.split())
    if w_string in fog_terms or any(word in fog_terms for word in weather):
        group = 'Fog'
    elif w_string in snow_terms or any(word in snow_terms for word in weather):
        group = 'Snow'
    elif w_string in cloudy_terms or any(word in cloudy_terms for word in weather):
        group = 'Cloudy'
    elif w_string in rain_terms or any(word in rain_terms for word in weather):
        group = 'Rain'
    elif w_string in clear_terms or any(word in clear_terms for word in weather):
        group = 'Clear'
    elif w_string in dust_terms or any(word in dust_terms for word in weather):
        group = 'Dust'
    else:
        group = w_string
    grouped_weather.append(group)
set(grouped_weather)


{'Clear', 'Cloudy', 'Dust', 'Fog', 'Rain', 'Snow'}

ModuleNotFoundError: No module named 'Tkinter'

In [122]:
processed_data[439]

array(['A-440', '17:23:06', nan, 40.200333, -83.027435], dtype=object)

In [76]:
processed_data

array([['A-1', '05:46:00', 'Light Rain'],
       ['A-2', '06:07:59', 'Light Rain'],
       ['A-3', '06:49:27', 'Overcast'],
       ...,
       ['A-4239404', '19:00:21', 'Partly Cloudy'],
       ['A-4239405', '19:00:21', 'Fair'],
       ['A-4239406', '18:52:06', 'Fair']], dtype=object)

In [89]:
day.shape

(716, 3)

In [77]:
df.shape

(3, 131)

In [None]:
day[day.Weather_Condition == 'Clear'].shape[0]


In [None]:
sort weather by time, where day is 6am to 12pm, noon is 12pm to 6pm, night is 6pm to 6am
count number of accidents that occur within these time frames given the weather condition
for now, visualize by charting data