In [1]:
import matplotlib.pyplot as plt
from datetime import datetime
import folium
import numpy as np
import matplotlib as mpl
from folium.plugins import MarkerCluster
import json
import fiona 
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.ticker as ticker
import pandas as pd
from plotly import graph_objs as go
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

In [2]:
sns.set_theme()
sns.set_palette('colorblind')

In [3]:
#import WAZE data
df=pd.read_csv('MORAalertsTotal.csv', encoding="ISO-8859-1")

In [4]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [5]:
df.head()

Unnamed: 0,Id,NAME,UNIT_TYPE,Time,Alert,Sub_Alert,Street,State,Lon,Lat,Type,geometry,Park,Region,Entrance
0,40724,MORA,National Park,2017-03-05 12:54:05,ROAD_CLOSED,ROAD_CLOSED_EVENT,SR-410,WA,-121.539917,46.866887,region,POINT (-121.539917 46.866887),1,0,
1,40732,MORA,National Park,2017-03-05 12:54:06,ROAD_CLOSED,ROAD_CLOSED_EVENT,SR-410,WA,-121.520425,46.869337,region,POINT (-121.520425 46.869337),1,0,Cayuse Pass
2,40734,MORA,National Park,2017-03-05 12:53:46,ROAD_CLOSED,ROAD_CLOSED_EVENT,SR-123,WA,-121.519142,46.830834,region,POINT (-121.519142 46.830834),1,0,
3,40735,MORA,National Park,2017-03-05 12:54:33,ROAD_CLOSED,ROAD_CLOSED_EVENT,SR-123,WA,-121.554361,46.785824,region,POINT (-121.554361 46.785824),1,0,
4,40737,MORA,National Park,2017-03-05 12:53:40,ROAD_CLOSED,ROAD_CLOSED_EVENT,SR-123,WA,-121.554361,46.785824,region,POINT (-121.554361 46.785824),1,0,


In [6]:
def label_season(row):
    if row['Time'].month in [1,2,3,4,11,12] or (row['Time'].month==5 and row['Time'].day<=15):
        return('Winter')
    elif row['Time'].month in [6] or (row['Time'].month==5 and row['Time'].day>15) or (row['Time'].month==10 and row['Time'].day<15):
        return('Shoulder')
    return('Peak')

In [7]:
df['Time']=pd.to_datetime(df['Time'])
df['season']=df.apply(lambda row:label_season(row), axis=1)
df['day_of_week'] = df['Time'].dt.day_name()
df['Hour'] = df['Time'].dt.hour

In [8]:
#full dataset season
df_season_grouped=df.groupby(['season']).count()
df_season_grouped=df_season_grouped.reset_index()
df_season_grouped=df_season_grouped[['season','Id']].sort_values(by='season')
df_season_grouped

Unnamed: 0,season,Id
0,Peak,13592
1,Shoulder,12410
2,Winter,92473


In [9]:
#full dataset day of week
df_day_grouped=df.groupby(['day_of_week']).count()
df_day_grouped=df_day_grouped.reset_index()
df_day_grouped=df_day_grouped[['day_of_week','Id']].sort_values(by='day_of_week')
df_day_grouped=df_day_grouped.reindex([1,5,6,4,0,2,3])
df_day_grouped

Unnamed: 0,day_of_week,Id
1,Monday,16218
5,Tuesday,17420
6,Wednesday,17115
4,Thursday,17177
0,Friday,17525
2,Saturday,17391
3,Sunday,15629


In [10]:
#full dataset time dist
df_time_grouped=df.groupby(['Hour']).count()
df_time_grouped=df_time_grouped.reset_index()
df_time_grouped=df_time_grouped[['Hour','Id']].sort_values(by='Hour')
df_time_grouped

Unnamed: 0,Hour,Id
0,0,8850
1,1,6418
2,2,4466
3,3,3907
4,4,3624
5,5,3548
6,6,6889
7,7,3564
8,8,3527
9,9,3334


In [11]:
#dataset without closure alerts 
nonclosureAlerts = df.loc[df['Alert']!='ROAD_CLOSED']

In [12]:
#dataset without closure alerts  season
df_season_grouped=nonclosureAlerts.groupby(['season']).count()
df_season_grouped=df_season_grouped.reset_index()
df_season_grouped=df_season_grouped[['season','Id']].sort_values(by='season')
df_season_grouped

Unnamed: 0,season,Id
0,Peak,12235
1,Shoulder,6270
2,Winter,13617


In [13]:
#dataset without closure alerts day of week
df_day_grouped=nonclosureAlerts.groupby(['day_of_week']).count()
df_day_grouped=df_day_grouped.reset_index()
df_day_grouped=df_day_grouped[['day_of_week','Id']].sort_values(by='day_of_week')
df_day_grouped=df_day_grouped.reindex([1,5,6,4,0,2,3])
df_day_grouped

Unnamed: 0,day_of_week,Id
1,Monday,3952
5,Tuesday,3981
6,Wednesday,4675
4,Thursday,4848
0,Friday,5700
2,Saturday,5424
3,Sunday,3542


In [14]:
#dataset without closure alerts  time dist
df_time_grouped=nonclosureAlerts.groupby(['Hour']).count()
df_time_grouped=df_time_grouped.reset_index()
df_time_grouped=df_time_grouped[['Hour','Id']].sort_values(by='Hour')
df_time_grouped

Unnamed: 0,Hour,Id
0,0,5401
1,1,2781
2,2,949
3,3,585
4,4,302
5,5,218
6,6,134
7,7,79
8,8,37
9,9,35


In [15]:
#open hours 7:30 am - 5:00 pm

In [15]:
#park dataset with closure alerts 
parkWclosed = df.loc[df['Park']==1]

#park dataset without closure alerts 
parkWOclosed = nonclosureAlerts.loc[nonclosureAlerts['Park']==1]

In [16]:
#dataset without closure alerts  season
df_season_grouped=parkWOclosed.groupby(['season']).count()
df_season_grouped=df_season_grouped.reset_index()
df_season_grouped=df_season_grouped[['season','Id']].sort_values(by='season')
df_season_grouped

Unnamed: 0,season,Id
0,Peak,272
1,Shoulder,25
2,Winter,13


In [17]:
#dataset without closure alerts day of week
df_day_grouped=parkWOclosed.groupby(['day_of_week']).count()
df_day_grouped=df_day_grouped.reset_index()
df_day_grouped=df_day_grouped[['day_of_week','Id']].sort_values(by='day_of_week')
df_day_grouped=df_day_grouped.reindex([1,5,6,4,0,2,3])
df_day_grouped

Unnamed: 0,day_of_week,Id
1,Monday,31
5,Tuesday,16
6,Wednesday,27
4,Thursday,30
0,Friday,18
2,Saturday,99
3,Sunday,89


In [18]:
#dataset without closure alerts  time dist
df_time_grouped=parkWOclosed.groupby(['Hour']).count()
df_time_grouped=df_time_grouped.reset_index()
df_time_grouped=df_time_grouped[['Hour','Id']].sort_values(by='Hour')
df_time_grouped

Unnamed: 0,Hour,Id
0,0,9
1,1,4
2,2,2
3,3,1
4,4,1
5,7,1
6,14,3
7,15,17
8,16,21
9,17,40


In [21]:
# entrance dataset 
entOnly=df.dropna(subset=['Entrance'])
entOnlyWOclosed=nonclosureAlerts.dropna(subset=['Entrance'])

In [22]:
#entrance dataset with closure alerts  season
df_season_grouped=entOnly.groupby(['season']).count()
df_season_grouped=df_season_grouped.reset_index()
df_season_grouped=df_season_grouped[['season','Id']].sort_values(by='season')
df_season_grouped

Unnamed: 0,season,Id
0,Peak,1210
1,Shoulder,4522
2,Winter,60178


In [23]:
#entrance dataset with closure alerts day of week
df_day_grouped=entOnly.groupby(['day_of_week']).count()
df_day_grouped=df_day_grouped.reset_index()
df_day_grouped=df_day_grouped[['day_of_week','Id']].sort_values(by='day_of_week')
df_day_grouped=df_day_grouped.reindex([1,5,6,4,0,2,3])
df_day_grouped

Unnamed: 0,day_of_week,Id
1,Monday,9519
5,Tuesday,9583
6,Wednesday,9380
4,Thursday,9413
0,Friday,8914
2,Saturday,9657
3,Sunday,9444


In [24]:
#entrance dataset with closure alerts  time dist
df_time_grouped=entOnly.groupby(['Hour']).count()
df_time_grouped=df_time_grouped.reset_index()
df_time_grouped=df_time_grouped[['Hour','Id']].sort_values(by='Hour')
df_time_grouped

Unnamed: 0,Hour,Id
0,0,2673
1,1,2752
2,2,2713
3,3,2610
4,4,2684
5,5,2642
6,6,3234
7,7,2723
8,8,2609
9,9,2611


In [25]:
#entrance dataset with closure alerts  season
df_season_grouped=entOnlyWOclosed.groupby(['season']).count()
df_season_grouped=df_season_grouped.reset_index()
df_season_grouped=df_season_grouped[['season','Id']].sort_values(by='season')
df_season_grouped

Unnamed: 0,season,Id
0,Peak,1034
1,Shoulder,235
2,Winter,166


In [26]:
#entrance dataset with closure alerts day of week
df_day_grouped=entOnlyWOclosed.groupby(['day_of_week']).count()
df_day_grouped=df_day_grouped.reset_index()
df_day_grouped=df_day_grouped[['day_of_week','Id']].sort_values(by='day_of_week')
df_day_grouped=df_day_grouped.reindex([1,5,6,4,0,2,3])
df_day_grouped

Unnamed: 0,day_of_week,Id
1,Monday,103
5,Tuesday,60
6,Wednesday,53
4,Thursday,75
0,Friday,76
2,Saturday,577
3,Sunday,491


In [27]:
#entrance dataset with closure alerts  time dist
df_time_grouped=entOnlyWOclosed.groupby(['Hour']).count()
df_time_grouped=df_time_grouped.reset_index()
df_time_grouped=df_time_grouped[['Hour','Id']].sort_values(by='Hour')
df_time_grouped

Unnamed: 0,Hour,Id
0,0,46
1,1,24
2,2,8
3,3,7
4,4,5
5,5,1
6,6,1
7,7,2
8,11,1
9,13,4


In [None]:
# entrance analysis: time of day for all nonClosure (line graph), type of alert (set of bars for each entrance), 
    #comparison to RSG (exit/enter/irma), jams by type & time of day (line graph w/ 3 lines)
# park analysis: time of day for all nonClosure(line graph)
    #commonly jammed streets? or ranking of most jammed inside park?

#how to look at different alert types? -hazard from other car

# STRAVA= bike and ped analysis
    # age and time of day dist. graphs--> both on 1 graph
    # gender % diff map--> 2 separate maps
    