# Visualizing Realtime Traffic Data

Assignment: Using the NDW data source below, write:

1. a data retrieval function that automatically downloads, extracts and loads realtime data such as traffic accidents
2. a data visualization function that displays the retrieved data on a map

Data source: http://opendata.ndw.nu

Note that this data source does not have an easy to use API and only provides zipped XML files containing the required data.


## Libraries

In [1]:
# Libraries
import wget                                       # Download file
import gzip                                       # Unzip
import os                                         # File handling
from bs4 import BeautifulSoup                     # Parsing xml
import pandas as pd                               # DataFrames
import geopandas as gpd                           # GeoDataFrames
from geopandas import GeoDataFrame, GeoSeries     # Direct reference to geopandas objects
from shapely.geometry import Point, LineString    # Direct reference to shapely objects
import matplotlib.pyplot as plt                   # Plotting
import mplleaflet                                 # Plotting on interactive map

# Library settings
%matplotlib inline
pd.set_option("display.max_rows",100)
pd.set_option("display.max_columns",100)

## Functions

In [2]:
# Functions

def retrieve_data(url):
    # Download
    path = wget.download(url)

    # Extract
    file = gzip.open(path, 'rb')
    content = file.read()
    file.close()
    
    # Delete downloaded file
    os.remove(path)
    
    return(content)


def get_subtag_text(tag, subtag):
    try:
        text = tag.find(subtag).get_text()
    except:
        text = None
    
    return(text)


def parse_incidents(content):
    # Create BeautifulSoup object for parsing
    bs_parser = BeautifulSoup(content, 'lxml-xml')
    
    # Create empty dataframe
    df_incidents = pd.DataFrame()

    # Loop over all situation records
    records = bs_parser.find_all('situationRecord')
    for record in records:
        # Extract specific info
        id = record.get('id')
        longitude = get_subtag_text(record, 'longitude')
        latitude = get_subtag_text(record, 'latitude')
        creationtime = get_subtag_text(record, 'situationRecordCreationTime')
        validitystatus = get_subtag_text(record, 'validityStatus')
        starttime = get_subtag_text(record, 'overallStartTime')
        direction = get_subtag_text(record, 'alertCDirectionCoded')
        location = get_subtag_text(record, 'specificLocation')
        offsetdistance = get_subtag_text(record, 'offsetDistance')
        accidenttype = get_subtag_text(record, 'accidentType')
            
        # Append rows to dataframe
        row = [{'id': id, 'longitude': longitude, 'latitude': latitude, 'creationtime': creationtime,
                'validitystatus': validitystatus, 'starttime': starttime, 
                'direction': direction, 'location': location,
                'offsetdistance': offsetdistance, 'accidenttype': accidenttype}]
        df_incidents = df_incidents.append(row, ignore_index=True)
        
    # Type casting
    df_incidents['creationtime'] = pd.to_datetime(df_incidents['creationtime'])
    df_incidents['starttime'] = pd.to_datetime(df_incidents['starttime'])
    df_incidents['longitude'] = df_incidents['longitude'].astype('float64')
    df_incidents['latitude'] = df_incidents['latitude'].astype('float64')
    df_incidents['offsetdistance'] = df_incidents['offsetdistance'].astype('float')
    
    # Data cleaning
    df_incidents['accidenttype'] = df_incidents['accidenttype'].fillna('other')
    
    # Set dataframe index
    df_incidents = df_incidents.set_index('id')
    
    # Add column with Point coordinates, convert to GeoDataFrame and drop original longitude/latitude cols
    df_incidents['coordinates'] = list(zip(df_incidents['longitude'], df_incidents['latitude']))
    df_incidents['coordinates'] = df_incidents['coordinates'].apply(Point)
    gdf_incidents = GeoDataFrame(df_incidents, geometry='coordinates')
    gdf_incidents = gdf_incidents.drop(['longitude', 'latitude'], axis=1)

    return(gdf_incidents)


"""
Leaflet plot with zoomable map background
"""
def leaflet_plot(gdf, **args):
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'aspect':'equal'})
    plot = gdf_incidents.plot(ax=ax, **args)
    leaflet = mplleaflet.display(fig=plot.figure)
    
    return(leaflet)

## 1. Data Retrieval

In [3]:
# Settings
incidents_url = 'http://opendata.ndw.nu/incidents.xml.gz'

# Retrieve and parse incident data
incidents_content = retrieve_data(url=incidents_url)
gdf_incidents = parse_incidents(incidents_content)

# Inspect data
gdf_incidents.head(3)

Unnamed: 0_level_0,accidenttype,creationtime,direction,location,offsetdistance,starttime,validitystatus,coordinates
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
RWS03_582163_1,other,2018-12-03 21:28:02,negative,22288,1500.0,2018-12-03 21:27:57,active,POINT (4.344616889953613 51.96369934082031)
RWS03_582160_1,other,2018-12-03 21:26:32,positive,9113,700.0,2018-12-03 21:26:25,active,POINT (4.316531181335449 51.48501968383789)
RWS03_582159_1,accident,2018-12-03 21:26:23,negative,7000,1100.0,2018-12-03 21:26:16,active,POINT (4.960783004760742 52.3497200012207)


## 2. Spatial Visualization

In [4]:
# Zoomable map plot
leaflet_plot(gdf_incidents, column = 'accidenttype')

# In progress: traffic speeds

Takes a very long time to process...

In [5]:
# Settings
trafficspeed_url = 'http://opendata.ndw.nu/trafficspeed.xml.gz'

# Retrieve and parse incident data
trafficspeed_content = retrieve_data(url=trafficspeed_url)
#gdf_trafficspeed = parse_trafficspeed(trafficspeed_content)

In [None]:
# from io import StringIO
# import lxml.etree as et
# import pandas as pd

# # Load XML and XSL files
# #doc = et.fromstring(trafficspeed_content)
# doc = et.fromstring(trafficspeed_content)
# xsl = et.parse('./xslt/trafficspeed-selection.xsl')

# # Initialize and run transformation
# transform = et.XSLT(xsl)
# # Convert result to string
# result = str(transform(doc))

# # Import into dataframe§
# df = pd.read_csv(StringIO(result))
# df.head()