# EDA for Station Metadata

**Load**

In [1]:
import pickle

In [2]:
with open('./data/loaded/meta.dat', 'rb') as f:
    meta = pickle.load(f)
meta

Unnamed: 0,ID,Fwy,Dir,Latitude,Longitude,Length,Type,Lanes
0,715898,5,S,33.880183,-118.021787,0.430,ML,3
1,715900,5,S,33.882892,-118.026822,,OR,1
2,715901,5,N,33.883400,-118.027451,,OR,1
3,715903,5,N,33.886992,-118.034125,,OR,1
4,715904,5,S,33.892489,-118.044573,,OR,1
...,...,...,...,...,...,...,...,...
4878,777066,405,S,34.161152,-118.469631,0.455,ML,3
4879,777067,405,N,34.161136,-118.469426,0.790,HV,1
4880,777068,405,S,34.161152,-118.469631,0.790,HV,1
4881,777168,10,E,34.068993,-118.149756,0.450,HV,1


**Build Map Visualizing all Stations and General Statistics**

In [41]:
from folium import Map
import folium.plugins as plugins
import folium

In [79]:
dir_mapper = {'S': 'south', 'N': 'north', 'E': 'east', 'W': 'west'}
m = Map(location=(34.0522,-118.2437), tiles='https://tile.jawg.io/jawg-dark/{z}/{x}/{y}{r}.png?access-token=yxQukjQJyY3mRrF6htcGR22i1QJ6BP6wslSe2Cmq2k4aT8S0wbDtYMEaPhc8s240', attr='<a href="http://jawg.io" title="Tiles Courtesy of Jawg Maps" target="_blank">&copy; <b>Jawg</b>Maps</a> &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', control_scale=True, max_bounds=True, prefer_canvas=True, zoom_start=10)
for ind, row in meta.iterrows():
    icon = folium.features.CustomIcon(f"./data/icons/{(dir_mapper[row['Dir']])}.png", icon_size=(15, 15))
    style = 'font-family: Impact, Haettenschweiler, "Franklin Gothic Bold", Charcoal, "Helvetica Inserat", "Bitstream Vera Sans Bold", "Arial Black", "sans serif";'
    html = f'''<body style="{style}"><div align="center", style="background-color: #BAD6FF; font-family: Arial">''' + row.to_frame().transpose()[['ID', 'Fwy', 'Length', 'Type', 'Lanes']].transpose().to_html(justify="center", header=False, index=True, index_names=False, col_space=300, classes="table-condensed table-responsive table-success") + '</div></body>' 
    popup = folium.Popup(html, max_width=300)                
    folium.Marker(row[['Latitude', 'Longitude']].values.tolist(), icon=icon, popup=popup).add_to(m)

In [81]:
m.save('./plots/station_map.html')

**Downsample Stations Using RDP**

In [4]:
from simplification.cutil import (
    simplify_coords,
    simplify_coords_idx,
    simplify_coords_vw,
    simplify_coords_vw_idx,
    simplify_coords_vwp,
)
import pandas as pd

In [5]:
ds_coords = pd.DataFrame(simplify_coords(meta[["Latitude", 'Longitude']].values, 0.01), columns=['Latitude', 'Longitude'])
ds_coords

Unnamed: 0,Latitude,Longitude
0,33.880183,-118.021787
1,33.916643,-118.079557
2,34.007592,-118.157266
3,34.029033,-118.217141
4,34.080943,-118.222370
...,...,...
1037,34.055122,-118.212293
1038,33.873704,-118.219282
1039,34.024458,-118.239564
1040,34.161152,-118.469631


**Downsample Stations Using Rules**

All Stations

In [None]:
m = Map(location=(34.0522,-118.2437), tiles='https://tile.jawg.io/jawg-dark/{z}/{x}/{y}{r}.png?access-token=yxQukjQJyY3mRrF6htcGR22i1QJ6BP6wslSe2Cmq2k4aT8S0wbDtYMEaPhc8s240', attr='<a href="http://jawg.io" title="Tiles Courtesy of Jawg Maps" target="_blank">&copy; <b>Jawg</b>Maps</a> &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', control_scale=True, max_bounds=True, prefer_canvas=True, zoom_start=10)
for ind, row in meta[meta['Dir'] == 'N'].iterrows():
    icon = folium.features.CustomIcon(f"./data/icons/north.png", icon_size=(15, 15))
    style = 'font-family: Impact, Haettenschweiler, "Franklin Gothic Bold", Charcoal, "Helvetica Inserat", "Bitstream Vera Sans Bold", "Arial Black", "sans serif";'
    html = f'''<body style="{style}"><div align="center", style="background-color: #BAD6FF; font-family: Arial">''' + row.to_frame().transpose()[['ID', 'Fwy', 'Length', 'Type', 'Lanes']].transpose().to_html(justify="center", header=False, index=True, index_names=False, col_space=300, classes="table-condensed table-responsive table-success") + '</div></body>' 
    popup = folium.Popup(html, max_width=300)                
    folium.Marker(row[['Latitude', 'Longitude']].values.tolist(), icon=icon, popup=popup).add_to(m)
m

After Downsampling

In [135]:
# 1 deg lat = 69 mi
# 1 deg long = 54 mi
distance_between_stations = meta[meta['Dir'] == 'N'].sort_values(['Latitude', 'Longitude'])[['Latitude', 'Longitude']].diff()
stations = distance_between_stations[((distance_between_stations['Latitude'] * 69) > 0.1) | ((distance_between_stations['Longitude'] * 54) > 0.1)].index.tolist()
#stations = set(stations).union(set([s - 1 for s in stations]))
len(stations)

333

In [None]:
m = Map(location=(34.0522,-118.2437), tiles='https://tile.jawg.io/jawg-dark/{z}/{x}/{y}{r}.png?access-token=yxQukjQJyY3mRrF6htcGR22i1QJ6BP6wslSe2Cmq2k4aT8S0wbDtYMEaPhc8s240', attr='<a href="http://jawg.io" title="Tiles Courtesy of Jawg Maps" target="_blank">&copy; <b>Jawg</b>Maps</a> &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', control_scale=True, max_bounds=True, prefer_canvas=True, zoom_start=10)
for ind, row in meta[meta.index.isin(stations)].iterrows():
    icon = folium.features.CustomIcon(f"./data/icons/north.png", icon_size=(15, 15))
    style = 'font-family: Impact, Haettenschweiler, "Franklin Gothic Bold", Charcoal, "Helvetica Inserat", "Bitstream Vera Sans Bold", "Arial Black", "sans serif";'
    html = f'''<body style="{style}"><div align="center", style="background-color: #BAD6FF; font-family: Arial">''' + row.to_frame().transpose()[['ID', 'Fwy', 'Length', 'Type', 'Lanes']].transpose().to_html(justify="center", header=False, index=True, index_names=False, col_space=300, classes="table-condensed table-responsive table-success") + '</div></body>' 
    popup = folium.Popup(html, max_width=300)                
    folium.Marker(row[['Latitude', 'Longitude']].values.tolist(), icon=icon, popup=popup).add_to(m)
m

# EDA for Speed Data

**Data Dictionary**

In [20]:
data_dict = pd.read_csv('./data/pems/station_dict.csv', names=['Feature', 'Description', 'Unit']).iloc[1:].reset_index(drop=True)
data_dict

Unnamed: 0,Feature,Description,Unit
0,Timestamp,The date and time of the beginning of the summ...,
1,Station,Unique station identifier. Use this value to c...,
2,District,District #,
3,Freeway #,Freeway #,
4,Direction of Travel,N | S | E | W,
5,Lane Type,A string indicating the type of lane. Possible...,
6,Station Length,Segment length covered by the station in miles...,
7,Samples,Total number of samples received for all lanes.,
8,% Observed,Percentage of individual lane points at this l...,%
9,Total Flow,Sum of flows over the 5-minute period across a...,Veh/5-min


In [21]:
from glob import glob
files = glob('./data/pems/*.gz')
len(files)

365

In [53]:
import gzip
with gzip.open(files[0]) as f:
    df = pd.read_csv(f, index_col=False, names=data_dict['Feature'].values.tolist())[['Timestamp', 'Station', 'Station Length', 'Samples', 'Total Flow', 'Avg Occupancy', 'Avg Speed', 'Lane N Samples', 'Lane N Flow', 'Lane N Avg Occ', 'Lane N Avg Speed', 'Lane N Observed']]
df.head()

Unnamed: 0,Timestamp,Station,Station Length,Samples,Total Flow,Avg Occupancy,Avg Speed,Lane N Samples,Lane N Flow,Lane N Avg Occ,Lane N Avg Speed,Lane N Observed
0,01/05/2021 00:00:00,715898,0.43,0,202.0,0.0357,70.4,0.0,54.0,0.0254,74.2,0
1,01/05/2021 00:00:00,715900,,0,,,,0.0,,,,0
2,01/05/2021 00:00:00,715901,,0,,,,0.0,,,,0
3,01/05/2021 00:00:00,715903,,0,,,,0.0,,,,0
4,01/05/2021 00:00:00,715904,,9,,,,9.0,,,,0


In [49]:
dir_mapper = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west'}

**Top 5 Sensors w/ Highest Avg. Speed on 01/05/2021**

In [50]:
high_speed_sensors = df.groupby('Station')['Avg Speed'].mean().dropna().sort_values().tail(5)
high_speed_sensors_meta = meta.merge(high_speed_sensors, left_on='ID', right_index=True)

m = Map(location=(34.0522,-118.2437), tiles='https://tile.jawg.io/jawg-dark/{z}/{x}/{y}{r}.png?access-token=yxQukjQJyY3mRrF6htcGR22i1QJ6BP6wslSe2Cmq2k4aT8S0wbDtYMEaPhc8s240', attr='<a href="http://jawg.io" title="Tiles Courtesy of Jawg Maps" target="_blank">&copy; <b>Jawg</b>Maps</a> &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', control_scale=True, max_bounds=True, prefer_canvas=True, zoom_start=10)
for ind, row in high_speed_sensors_meta.iterrows():
    icon = folium.features.CustomIcon(f"./data/icons/{dir_mapper[row['Dir']]}.png", icon_size=(15, 15))
    style = 'font-family: Impact, Haettenschweiler, "Franklin Gothic Bold", Charcoal, "Helvetica Inserat", "Bitstream Vera Sans Bold", "Arial Black", "sans serif";'
    html = f'''<body style="{style}"><div align="center", style="background-color: #BAD6FF; font-family: Arial">''' + row.to_frame().transpose()[['ID', 'Fwy', 'Length', 'Type', 'Lanes', 'Avg Speed']].transpose().to_html(justify="center", header=False, index=True, index_names=False, col_space=300, classes="table-condensed table-responsive table-success") + '</div></body>' 
    popup = folium.Popup(html, max_width=300)                
    folium.Marker(row[['Latitude', 'Longitude']].values.tolist(), icon=icon, popup=popup).add_to(m)
m


**Top 5 Sensors w/ Lowest Avg. Speed on 01/05/2021**

In [51]:
low_speed_sensors = df.groupby('Station')['Avg Speed'].mean().dropna().sort_values().head(5)
low_speed_sensors_meta = meta.merge(low_speed_sensors, left_on='ID', right_index=True)

m = Map(location=(34.0522,-118.2437), tiles='https://tile.jawg.io/jawg-dark/{z}/{x}/{y}{r}.png?access-token=yxQukjQJyY3mRrF6htcGR22i1QJ6BP6wslSe2Cmq2k4aT8S0wbDtYMEaPhc8s240', attr='<a href="http://jawg.io" title="Tiles Courtesy of Jawg Maps" target="_blank">&copy; <b>Jawg</b>Maps</a> &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', control_scale=True, max_bounds=True, prefer_canvas=True, zoom_start=10)
for ind, row in low_speed_sensors_meta.iterrows():  
    icon = folium.features.CustomIcon(f"./data/icons/{dir_mapper[row['Dir']]}.png", icon_size=(15, 15))
    style = 'font-family: Impact, Haettenschweiler, "Franklin Gothic Bold", Charcoal, "Helvetica Inserat", "Bitstream Vera Sans Bold", "Arial Black", "sans serif";'
    html = f'''<body style="{style}"><div align="center", style="background-color: #BAD6FF; font-family: Arial">''' + row.to_frame().transpose()[['ID', 'Fwy', 'Length', 'Type', 'Lanes', 'Avg Speed']].transpose().to_html(justify="center", header=False, index=True, index_names=False, col_space=300, classes="table-condensed table-responsive table-success") + '</div></body>' 
    popup = folium.Popup(html, max_width=300)                
    folium.Marker(row[['Latitude', 'Longitude']].values.tolist(), icon=icon, popup=popup).add_to(m)
m


**How Does Speed Change Over Time for a Particular Station**

In [65]:
# find station that has most speed readings
station_most_speed_readings = df.groupby('Station')['Avg Speed'].count().idxmax()
df[df['Station'] == station_most_speed_readings].head()

Unnamed: 0,Timestamp,Station,Station Length,Samples,Total Flow,Avg Occupancy,Avg Speed,Lane N Samples,Lane N Flow,Lane N Avg Occ,Lane N Avg Speed,Lane N Observed
0,01/05/2021 00:00:00,715898,0.43,0,202.0,0.0357,70.4,0.0,54.0,0.0254,74.2,0
4888,01/05/2021 00:05:00,715898,0.43,0,198.0,0.0346,70.0,0.0,53.0,0.0246,73.8,0
9776,01/05/2021 00:10:00,715898,0.43,0,193.0,0.0349,69.8,0.0,51.0,0.0248,73.7,0
14664,01/05/2021 00:15:00,715898,0.43,0,188.0,0.0337,69.7,0.0,50.0,0.024,73.5,0
19552,01/05/2021 00:20:00,715898,0.43,0,179.0,0.0322,69.6,0.0,48.0,0.0229,73.4,0


In [68]:
import plotly.express as px

px.line(df[df['Station'] == station_most_speed_readings], x='Timestamp', y='Avg Speed', title=f'Avg Speed vs. Time for Station {station_most_speed_readings}')