# OSM Variable extraction

The following provides an overview of progress of extracting OSM data. 

## Import packages

In [1]:
from __future__ import division

# Computational package
import pandas as pd
import numpy as np

# Geographical packages
import geopandas as gpd
from shapely.geometry import LineString
import utm
import re

# Visualization packages
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix

%matplotlib inline

## Functions

### Extract data from OSM

Mapzen.com gives the possibility to extract data for specific segments (cities or custom segments). This data includes several .geojson files for, for example, roads, amenities and places. We are interested in the road file and we can import that using geopandas. The data includes valuable data on the type of road and the xistence of tunnels and bridges. 

In [2]:
def extract_OSM_data(file_name):

    network = gpd.read_file(file_name)
    # Plot network
    # SP_roads.plot(figsize=(5,5))

    # Print dataframe
    network.head()
    
    return network

### Read pixel list

This code assumes pixel list with latitude and longitude coordinates is already created. We will read this file and create a bounding box with the following bounds. 
* minx - Latitude lowerbound
* maxx - Latitude upperbound
* miny - Longitude lowerbound
* maxy - Longitude upperbound

Since coordinates are provided in degrees, we first translate those coordinates to UTM before defining the bounds. 

In [3]:
def read_pixel_list(file_name, pixel_width = 1000):
    """
    Specify pixel_width to account for pixel size. For pixels of 1km^2, the pixel_width is 1000. 
    """
    
    SP_pixel = pd.read_excel(file_name)
    SP_pixel.reset_index(drop = True, inplace = True)

    pixel_width = 1000

    # Create empty lists to keep track
    minx_utm = []
    miny_utm = []
    maxx_utm = []
    maxy_utm = []

    # Iterate over all the rows for conversion
    for index, row in SP_pixel.iterrows():

        x_utm, y_utm, zone_letter, zone_number = utm.from_latlon(row['lat'], row['lon'])

        # Append to lists
        minx_utm.append(x_utm - pixel_width / 2 )
        miny_utm.append(y_utm - pixel_width / 2)
        maxx_utm.append(x_utm + pixel_width / 2)
        maxy_utm.append(y_utm + pixel_width / 2)

    # Update dataframe to UTM
    SP_pixel['minx'] = minx_utm
    SP_pixel['miny'] = miny_utm
    SP_pixel['maxx'] = maxx_utm
    SP_pixel['maxy'] = maxy_utm

    SP_pixel.head()
    
    return SP_pixel

### Manipulate geometry

Currently, each road is stored as a Shapely file. For easier processing of the roads, we take the following steps. 

#### Find maximum bounds of roads

We will find the maximum and minimum latitude and longitude for each road, so we can easier match it to the pixels at a later stage. Furthermore, we transform the bounds to UTM for easier processing. 

In [4]:
def find_road_bounds(roads_dataframe):
    # Create lat, lon road bounds
    road_bounds = roads_dataframe['geometry'].bounds

    # Create empty lists to keep track
    minx_utm = []
    miny_utm = []
    maxx_utm = []
    maxy_utm = []

    # Iterate over all the rows for conversion
    for index, row in road_bounds.iterrows():

        # Convert
        minx, miny, zone_letter, zone_number = utm.from_latlon(row['miny'], row['minx'])
        maxx, maxy, zone_letter, zone_number = utm.from_latlon(row['maxy'], row['maxx'])

        # Append to lists
        minx_utm.append(minx)
        miny_utm.append(miny)
        maxx_utm.append(maxx)
        maxy_utm.append(maxy)

    # Update dataframe to UTM
    road_bounds['minx'] = minx_utm
    road_bounds['miny'] = miny_utm
    road_bounds['maxx'] = maxx_utm
    road_bounds['maxy'] = maxy_utm

    # Concatenate dataframe to roads dataframe
    roads = pd.concat((roads_dataframe, road_bounds), axis = 1)
    
    return roads

#### Convert linestring to list of UTM coordinates

Each road is stored as a shapely LineString file. We transform it to a list of UTM coordinates for easier processing. 

In [5]:
def convert_linestring_to_UTM(road_df):

    # Lists to keep track
    road_utm_column = []
    road_length_column = []
    number_of_coord = []

    # Iterate over all roads
    for road_id, road in road_df.iterrows():
            # Translate linestring to list
            road_list = np.array(road['geometry']).tolist()

            # Check and store number of coord
            road_length = len(road_list)
            number_of_coord = road_length
            road_length_column.append(road_length)

            # Convert road to utm
            road_utm = []
            
            # Transform coordinates to UTM
            for coord in road_list:
                    x_utm, y_utm, zone_letter, zone_number = utm.from_latlon(coord[1], coord[0])
                    road_utm.append([x_utm, y_utm])

            # Store road in column
            road_utm_column.append(road_utm)

    # Add column to roads dataframe
    road_df['road_coords'] = road_utm_column
    road_df['number_of_nodes'] = road_length_column

    return road_df  

### Road Connectivity

To determine road connectivity, we want to find all nodes that share coordinates. If two nodes share a coordinate, they indicate an intersection. 

#### Find coordinates of relevant roads

First, we need to identify for what road types we are interested in to find intersections (some roadtypes, like stairs, walkways or bikepaths are not relevant for our analysis since cars cannot drive there). 

In [6]:
def find_coordinates_relevant_road_types(road_df, road_types):
    ''' Road types: List of road types as used in OSM'''
    
    print 'Total number of roads: ', len(road_df)
    road_df = road_df[road_df['type'].isin(road_types)]
    print 'Total number of allowable roads: ', len(road_df)
    
    # Create list to keep track of coordinates
    coordinates = []
    
    # Iterate over roads
    for road_id, road in road_df.iterrows():
            # Translate linestring to list
            road_list = road['road_coords']
            
            # Iterate over coordinates in road list
            # Check if road lenght is bigger than 1, if it is, there is just one point and we want to delete it later
            if len(road_list) >= 1.1:

                # Analyze first coordiante
                for coord in [road_list[0]]:
                    coordinates.append([int(coord[0]), int(coord[1]), (int(coord[0]), int(coord[1])), 1, 0, 0])

                # Analyze center coordinates
                for coord in road_list[1:-1]:
                    coordinates.append([int(coord[0]), int(coord[1]), (int(coord[0]), int(coord[1])), 0, 1, 0])

                # Analyze last coordinate
                for coord in [road_list[-1]]:
                    coordinates.append([int(coord[0]), int(coord[1]), (int(coord[0]), int(coord[1])), 1, 0, 0])

            # If there is only 1 coordinate
            else:
                for coord in road_list:
                    coordinates.append([int(coord[0]), int(coord[1]), (int(coord[0]), int(coord[1])), 0, 0, 1])
    
    # Store individual coordinates for road connectivity
    coord_df = pd.DataFrame(coordinates, columns = ['x', 'y', 'coords', 'connecting_node', 'center_node', 'one_node'])
    
    return coord_df

#### Find intersections

We take the following steps to find intersections.  

1. Search for all unique coordinates
2. Count frequency of unique coordinates
3. Take all coordinates that have frequency of two or more

For the coordinates of the remaining subset, two things could be the case

1. We have an intersection, or at least a point where you can go into three directions
2. The road changes name or type, a point where you can only move in two directions

Obviously, the latter does not point to road connectivity. We can identify this if two nodes 'connecting_node' coincide at the same coordinate. Therefore, we add a fourth filter to our analysis. 

4. Take all the coordinates that have frequency two or more and are not two connecting nodes

In [7]:
def road_connecitivity(coordinates_df):
    # Print number of coordines
    print 'Number of nodes: ', coordinates_df.shape[0]

    # Create pivot table to find nodes with the same coordiantes
    pivot_table = coordinates_df.pivot_table(index = ['x', 'y'], aggfunc=np.count_nonzero)

    #Delete all the nodes with only one coordiante
    pivot_table.drop(['one_node'], axis = 1, inplace = True)

    # Print number of unique coordiantes
    print 'Number of unique nodes: ', pivot_table.shape[0]

    # Filter coords with more than one node
    pivot_table = pivot_table[pivot_table['coords'] > 1]
    print 'Number of coordinates with two or more nodes: ', pivot_table.shape[0]

    # Filter coordinates with only two nodes that are both a connecting node
    pivot_table = pivot_table[(pivot_table['coords'] > 2) | (pivot_table['center_node'] >= 1)]
    print 'Number of connectivity coordinates: ', pivot_table.shape[0]
    
    pivot_table = pivot_table.reset_index()
    return pivot_table 

### Map points to pixel list

Next, we want to map interesections to the pixel list. 

The function we defined is more general and can be used to map any kind of points to the pixel list (think for example about public buildings or customers). However, this is outside the scope of this project. 

In [8]:
def map_points(points_df, pixel_list, column_title):
    """ Points_df: Requires x and y column
        column_title is title of column in pixel_list datafarme"""
    
    #Create list to keep track of frequency of points
    points_frequency = []
    
    # Iterate over pixels
    for pixel_id, pixel in pixel_list.iterrows():
        # Explicitly define bounds of pixel
        pixel_minx = pixel['minx']
        pixel_miny = pixel['miny']
        pixel_maxx = pixel['maxx']
        pixel_maxy = pixel['maxy']
        
        # Filter points df based on bounds
        points_in_pixel = points_df[(points_df['x'] < pixel_maxx) &
                  (points_df['x'] >= pixel_minx) &
                  (points_df['y'] < pixel_maxy) &
                  (points_df['y'] >= pixel_miny)]
        
        # Store frequency for this pixel
        points_frequency.append(points_in_pixel.shape[0])
        
    # Append data to pixel_list
    pixel_list[column_title] = points_frequency
    
    return pixel_list

### Road capacity

To to determine the road capacity in a pixel we have to take three steps. 

\begin{enumerate}
    \item Find the road(segment)s inside a pixel
    \item Find the distance of each segment and multiply by its capacity
    \item Sum over all road(segment)s inside the pixel
\end{enumerate}

2 and 3 are trivial, there are however some issues with 1. The following three situations can occur:

\begin{itemize}
    \item A roadsegment lies completely within the pixel (trivial)
    \item A roadsegment has a starting point in one pixel and an end point in another pixel
    \item A road segment crosses the pixel but has a start and end point in adjacent pixels
\end{itemize}

#### Check if points or roads are in a pixel

Based on the bounding box of each road and each pixel, we can determine if a road or a particular point is in the pixel. 

We define three different functions:

1. Check if a particular point lies in the pixel
2. Check if a line segment lies completely in the pixel
3. Check if a line segment lies partly in a particular pixel

In [9]:
def point_in_pixel(coordinate, minx, maxx, miny, maxy):
    
    # Check if coordinate is in pixel
    if (coordinate[0] >= minx 
        and coordinate[0] <= maxx 
        and coordinate[1] >= miny 
        and coordinate[1] <= maxy):
        
        return True
    
    else:
        return False
def check_completely_in_pixel(road, minx, maxx, miny, maxy):
    
    if (road['minx'] >= minx 
        and road['maxx'] <= maxx 
        and road['miny'] >= miny 
        and road['maxy'] <= maxy):
        
        return True
    
    else:
        
        return False
    
    
def check_partly_in_pixel(road, minx, maxx, miny, maxy):
    
    # Iterate over all coordiantes in road
    for coordinate in road['road_coords']:
        
        # If one of the coordinates is in the pixel, return true
        if (coordinate[0] >= minx 
            and coordinate[0] <= maxx 
            and coordinate[1] >= miny 
            and coordinate[1] <= maxy):

            return True
    
    # If none of the coordinates is in the pixel, return false
    return False

#### Find new road coordinates for segments that are partly in the pixel

If a line segment crosses the pixel boundary, we want to delete the coordinates from the road that are outside the pixel. However, we have to make sure that we allocate a part of the segment that crosses the pixel bounds to that pixel. We do so by finding the intersection between the segment and the bound, we 'determine the midpoint'. We can use this midpoint to 'create new road coordinates'. 

In [10]:
def create_new_road_coordinates(road, minx, maxx, miny, maxy):


    # Find points that are in or out
    node_in_pixel = []

    # Iterate over nodes to check if they're in the pixel
    for i in range(len(road)):
        node_in_pixel.append(point_in_pixel(road[i], minx, maxx, miny, maxy))

    # Create new node list
    node_list = []

    # Delete nodes that are out and for which the next one is out too, otherwise find midpoint
    for i in range(len(road[:-1])):

        # Check if current node is in
        if node_in_pixel[i] == False:

            #If subsequent node is in find midpoint
            if node_in_pixel[i + 1] == True:

                mid_point = determine_midpoint(road[i], road[i+1], minx, maxx, miny, maxy)
                node_list.append(mid_point)
        
        else:
            node_list.append(road[i])
            
             #If subsequent node is out find midpoint
            if node_in_pixel[i + 1] == False:
                mid_point = determine_midpoint(road[i], road[i+1], minx, maxx, miny, maxy)
                node_list.append(mid_point)
        
        # Check if the last node is in
        if i == len(road[:-1]) - 1:
            if node_in_pixel[i + 1] == True:
                node_list.append(road[i+1])

    return node_list

In [11]:
def determine_midpoint(point_1, point_2, minx, maxx, miny, maxy):

    # Determine line
    slope = (point_2[1] - point_1[1]) / (point_2[0] - point_1[0])
    intercept = point_1[1] - slope * point_1[0]

    # Check for each of the bounds if they are on the line segment
    if (point_2[0] <= maxx and point_1[0] >= maxx) or (point_2[0] >= maxx and point_1[0] <= maxx):
        # Find corresponding point
        new_x = maxx
        new_y = slope * new_x + intercept
        new_point = [new_x, new_y]
        
        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            return new_point
            
    if (point_2[0] <= minx and point_1[0] >= minx) or (point_2[0] >= minx and point_1[0] <= minx):
        # Find corresponding point
        new_x = minx
        new_y = slope * new_x + intercept
        new_point = [new_x, new_y]
        
        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            return new_point
            
    if (point_2[1] <= maxy and point_1[1] >= maxy) or (point_2[1] >= maxy and point_1[1] <= maxy):
        # Find corresponding point
        new_y = maxy
        new_x = (new_y - intercept) / slope
        new_point = [new_x, new_y]
        
        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            return new_point
            
    if (point_2[1] <= miny and point_1[1] >= miny) or (point_2[1] >= miny and point_1[1] <= miny):
        # Find corresponding point
        new_y = miny
        new_x = (new_y - intercept) / slope
        new_point = [new_x, new_y]     
        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            return new_point

#### Find new road coordinates for segments that cross the pixel

When the bounding box of a line segment overlaps with the pixel, but none of the endpoints lies within the pixel, we want to check every segment if it crosses the pixel. Therefore, we check for every segment if it intersects with two of the bounds (it always intersects with two, one to go in, one to go out). This function is slightly different than if a road is partly in the pixel, since we are looking for two intersections of the line with the pixel opposite to one. Therefore, we define two functions that are special cases of the once for a road that is partly in the pixel. 

In [12]:
def create_new_road_coordinates_crossing(road, minx, maxx, miny, maxy):

    # Create new node list
    node_list = []

    # We know that none of the nodes is in the pixel, so we iterate over all nodes and check for crossing immediatly
    for i in range(len(road[:-1])):

            # Determine if a segment crosses the boundary
            cross_points = determine_midpoint_crossing(road[i], road[i+1], minx, maxx, miny, maxy)

            # If there are not cross points do nothing
            # If there are two cross points that's the node list
            if cross_points == None:
                return node_list
            else:
                node_list = cross_points
                return node_list

In [13]:
def determine_midpoint_crossing(point_1, point_2, minx, maxx, miny, maxy):


    # List of cross points

    cross_points = []
    # Determine line
    slope = (point_2[1] - point_1[1]) / (point_2[0] - point_1[0])
    intercept = point_1[1] - slope * point_1[0]

    # Check for each of the bounds if they are on the line segment
    if (point_2[0] <= maxx and point_1[0] >= maxx) or (point_2[0] >= maxx and point_1[0] <= maxx):
        # Find corresponding point
        new_x = maxx
        new_y = slope * new_x + intercept
        new_point = [new_x, new_y]

        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            cross_points.append(new_point)

    if (point_2[0] <= minx and point_1[0] >= minx) or (point_2[0] >= minx and point_1[0] <= minx):
        # Find corresponding point
        new_x = minx
        new_y = slope * new_x + intercept
        new_point = [new_x, new_y]

        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            cross_points.append(new_point)

    if (point_2[1] <= maxy and point_1[1] >= maxy) or (point_2[1] >= maxy and point_1[1] <= maxy):
        # Find corresponding point
        new_y = maxy
        new_x = (new_y - intercept) / slope
        new_point = [new_x, new_y]

        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            cross_points.append(new_point)

    if (point_2[1] <= miny and point_1[1] >= miny) or (point_2[1] >= miny and point_1[1] <= miny):
        # Find corresponding point
        new_y = miny
        new_x = (new_y - intercept) / slope
        new_point = [new_x, new_y]     
        # Check if the newpoint is in the pixel, if so, we found our intersect
        if point_in_pixel(new_point, minx, maxx, miny, maxy):
            cross_points.append(new_point)

    return cross_points

#### Compute distance and capacity of segment

Since line segments are always straight, we can easily determine the distance of a linesegment using the Euclidian distance. The sum of the segments of a road in a pixel determine the total distance of that particular road in the pixel.

To determine road capacity, we multiply each road by a specific factor accounting for the number of lanes of that particular road. 

In [14]:
# compute length of road
def compute_distance_road(road):
    
    # Keep track of distance
    distance = 0
    
    # Iterate over road segments
    for i in range(len(road) - 1):
        distance = distance + np.linalg.norm(np.array(road[i]) - np.array(road[i + 1]))
    
    # Return distance in km
    return distance / 1000

# Based on the length of the road and the type of road, we can computed the road capacity
def compute_capacity_road(road, road_type):
    
    # Keep track of distance
    capacity = 0
    
    # Iterate over road segments
    for i in range(len(road) - 1):
        capacity = capacity + np.linalg.norm(np.array(road[i]) - np.array(road[i + 1])) * road_capacity_conversion[road_type]
    
    # Return distance in km
    return capacity / 1000  

#### Determine road capacity for a given type of road

Combining all steps described above, we can define the road capacity and distance for a particular pixel.

In [15]:
def road_capacity_per_pixel(pixel, roads_list, road_types):
    
    # Ignore roads that are not allowed
    roads_list = roads_list[roads_list['type'].isin(road_types)]
    
    # Explicitly define bounds of pixel
    pixel_minx = pixel['minx']
    pixel_miny = pixel['miny']
    pixel_maxx = pixel['maxx']
    pixel_maxy = pixel['maxy']

    # Filter road bounding boxes for overlap 
    roads_in_pixel = roads_list[roads_list['maxx'] >= pixel_minx]
    roads_in_pixel = roads_in_pixel[roads_in_pixel['minx'] <= pixel_maxx]
    roads_in_pixel = roads_in_pixel[roads_in_pixel['miny'] <= pixel_maxy]
    roads_in_pixel = roads_in_pixel[roads_in_pixel['maxy'] >= pixel_miny]
    
    #### Find roads completely in pixel ####
    # Create cleaned dataframe 
    roads_in_pixel_clean = pd.DataFrame(columns = roads_in_pixel.columns)

    # Keep track of road is completely in pixel
    list_completely_in_pixel = []

    # Iterate over roads
    for road_id, road in roads_in_pixel.iterrows():
        #Check if road bounding box falls completely within pixel
        completely_in_pixel = check_completely_in_pixel(road, pixel_minx, pixel_maxx, pixel_miny, pixel_maxy)

        if completely_in_pixel:
            list_completely_in_pixel.append(road_id)

    roads_in_pixel_clean = roads_in_pixel.loc[list_completely_in_pixel]
    roads_not_completely_in_pixel = roads_in_pixel.drop(list_completely_in_pixel, axis = 0)   

    # Remaining roads in pixel are not completely in pixel

    #### Find roads with at least one coordiante in the pixel ####
    # Keep track of roads that have one or more coordinates in pixel
    list_partly_in_pixel = []

    # Iterate over roads partly in pixel
    for road_id, road in roads_not_completely_in_pixel.iterrows():
        partly_in_pixel = check_partly_in_pixel(road, pixel_minx, pixel_maxx, pixel_miny, pixel_maxy)

        if partly_in_pixel:
            list_partly_in_pixel.append(road_id)

    # Update dataframe with roads partly in pixel
    roads_partly_in_pixel = roads_not_completely_in_pixel.loc[list_partly_in_pixel]

    # Create datafarme for roads potentially crossing the pixel
    roads_potentially_crossing_pixel = roads_not_completely_in_pixel.drop(list_partly_in_pixel, axis = 0)

    # Allocate correct portion of roads to pixel
    for road_id, road in roads_partly_in_pixel.iterrows():
        roads_partly_in_pixel.set_value(road_id, 'road_coords', create_new_road_coordinates(road['road_coords'], pixel_minx, pixel_maxx, pixel_miny, pixel_maxy))

    # Update clean pixel list with updated roads
    roads_in_pixel_clean = roads_in_pixel_clean.append(roads_partly_in_pixel)

    ### Find roads that potentially cross the pixel ####
    for road_id, road in roads_potentially_crossing_pixel.iterrows():
        roads_potentially_crossing_pixel.set_value(road_id, 'road_coords', create_new_road_coordinates_crossing(road['road_coords'], pixel_minx, pixel_maxx, pixel_miny, pixel_maxy))

    # Append to clean dataframe
    roads_in_pixel_clean = roads_in_pixel_clean.append(roads_potentially_crossing_pixel)
    
    # Compute road_capacity
    road_capacity_column = []
    road_distance_column = []
    
    # Iterate over all roads
    for index, road in roads_in_pixel.iterrows():
        road_capacity_column.append(compute_capacity_road(road['road_coords'], road['type']))
        road_distance_column.append(compute_distance_road(road['road_coords']))
        
    roads_in_pixel_clean['road_length'] = road_distance_column
    roads_in_pixel_clean['road_capacity'] = road_capacity_column
    
    return roads_in_pixel_clean

### Determine road capacity

To determine the road capacity for each pixel, we iterate over all pixels. We have defined different functions for roads and railroads, since we ware interested in different characteristics. However, the functions defined above are general and can be used for both road and railroad capacity. 

In [16]:
def road_capacity(pixel_list, roads_list, road_types):

    # The new columns we want to store for our pixels
    
    # Road distance
    road_distance_column = []
    road_distance_including_highway_column = []
    
    # Road capcity
    road_capacity_column = []
    road_capacity_including_highway_column = []
    
    # For road capacity one way, we exclude highways and primary roads
    road_capacity_one_way_column = []
    road_distance_one_way_column = []
    
    
    # Binary variabels to see if there is a bridge or a tunnel
    bridge_column = []
    tunnel_column = []
    
    # Highways are motorways or trunks and their links, we store a binary variable
    highway_column = []
    
    for pixel_id, pixel in pixel_list.iterrows():
        roads_in_pixel = road_capacity_per_pixel(pixel, roads_list, road_types)
        
        ####### Time to start collect info about the roads in the pixel and adding it to the pixel_list datafarme #######
        
        road_capacity_column.append(roads_in_pixel[~roads_in_pixel['type'].isin(['motorway', 'motorway_link', 
                                                    'trunk', 'trunk_link'])]['road_capacity'].sum())
        
        # Check if there are highways
        road_capacity_including_highway_column.append(roads_in_pixel['road_capacity'].sum())

        # Road distance
        road_distance_column.append(roads_in_pixel[~roads_in_pixel['type'].isin(['motorway', 'motorway_link', 
                                                    'trunk', 'trunk_link'])]['road_length'].sum())
        
        # Check if there are highways
        road_distance_including_highway_column.append(roads_in_pixel['road_length'].sum())
            
        # For road capacity one way, we exclude highways and primary roads
        # Filter road type
        roads_in_pixel_one_way = roads_in_pixel[~roads_in_pixel['type'].isin(['motorway', 'motorway_link', 
                                                    'trunk', 'trunk_link','primary', 'primary_link'])]
        # Filter one ways
        roads_in_pixel_one_way = roads_in_pixel_one_way[roads_in_pixel_one_way['oneway'] == 1]
        
        road_capacity_one_way_column.append(roads_in_pixel_one_way['road_capacity'].sum())
        road_distance_one_way_column.append(roads_in_pixel_one_way['road_length'].sum())
        
        # Check binary variables
        # Motorway
        if len(roads_in_pixel[roads_in_pixel['type'].isin(['motorway', 'motorway_link', 
                                                    'trunk', 'trunk_link'])]) > 0:
            highway_column.append(1)
        else:
            highway_column.append(0)
        
        # Bridge
        if roads_in_pixel['bridge'].sum() > 0:
            bridge_column.append(1)
        else:
            bridge_column.append(0)
        
        # Tunnel
        if roads_in_pixel['tunnel'].sum() > 0:
            tunnel_column.append(1)
        else:
            tunnel_column.append(0)
        
    ### Append columns to dataframe
    pixel_list['road_capacity'] = road_capacity_column
    pixel_list['road_capacity_incl_highway'] = road_capacity_including_highway_column
    pixel_list['road_capacity_highway'] = pixel_list['road_capacity_incl_highway'] - pixel_list['road_capacity']
    pixel_list['oneway_capacity'] = road_capacity_one_way_column
    pixel_list['oneway_percentage'] = pixel_list['oneway_capacity'] / (pixel_list['road_capacity'] + 0.0001)
    pixel_list['road_distance'] = road_distance_column
    pixel_list['road_distance_incl_highway'] = road_distance_including_highway_column
    pixel_list['road_distance_highway'] = pixel_list['road_distance_incl_highway'] - pixel_list['road_distance']
    pixel_list['oneway_distance'] = road_distance_one_way_column
    pixel_list['oneway_dist_percentage'] = pixel_list['oneway_distance'] / (pixel_list['road_distance'] + 0.0001)
    pixel_list['highway_bin'] = highway_column
    pixel_list['bridge_bin'] = bridge_column
    pixel_list['tunnel_bin'] = tunnel_column

        
    return pixel_list
    
    

In [17]:
def rail_road(pixel_list, roads_list, road_types):

    # Specify column to keep track
    rail_column = []
    
    # Iterate over pixels
    for pixel_id, pixel in pixel_list.iterrows():
        roads_in_pixel = road_capacity_per_pixel(pixel, roads_list, road_types)
    
        # Apply values
        if roads_in_pixel['road_capacity'].sum() > 0:
            rail_column.append(1)
        else:
            rail_column.append(0)
        
    pixel_list['rail_bin'] = rail_column
    
    return pixel_list
    

A similar process is followed for waterways, but we have to remember to read a different file for waterways. 

In [18]:
def waterway(pixel_list, roads_list, road_types):

    # Specify column to keep track
    waterway_column = []
    waterway_distance_column = []
    
    # Iterate over pixels
    for pixel_id, pixel in pixel_list.iterrows():
        roads_in_pixel = road_capacity_per_pixel(pixel, roads_list, road_types)
    
        # Apply values
        if roads_in_pixel['road_capacity'].sum() > 0:
            waterway_column.append(1)
        else:
            waterway_column.append(0)
            
        waterway_distance_column.append(roads_in_pixel['road_length'].sum())
        
    pixel_list['waterway_bin'] = waterway_column
    pixel_list['waterway_distance'] = waterway_distance_column 
    
    return pixel_list
    

## Parameter definition

We need to define several parameters that are used independent of the different cities. 

In [19]:
# Define road capacity conversion table
road_capacity_conversion = {}
road_capacity_conversion['motorway'] = 4
road_capacity_conversion['motorway_link'] = 4
road_capacity_conversion['trunk'] = 4
road_capacity_conversion['trunk_link'] = 4
road_capacity_conversion['primary'] = 3
road_capacity_conversion['primary_link'] = 3
road_capacity_conversion['secondary'] = 3
road_capacity_conversion['secondary_link'] = 3
road_capacity_conversion['tertiary'] = 2
road_capacity_conversion['tertiary_link'] = 2
road_capacity_conversion['unclassified'] = 1
road_capacity_conversion['residential'] = 1
road_capacity_conversion['pedestrian'] = 1
road_capacity_conversion['road'] = 1
road_capacity_conversion['living_street'] = 1
road_capacity_conversion['service'] = 1
road_capacity_conversion['footway'] = 1
road_capacity_conversion['steps'] = 1
road_capacity_conversion['cycleway'] = 1
road_capacity_conversion['subway'] = 1
road_capacity_conversion['rail'] = 1
road_capacity_conversion['track'] = 1
road_capacity_conversion['path'] = 1
road_capacity_conversion['disused'] = 1
road_capacity_conversion['raceway'] = 1
road_capacity_conversion['funicular'] = 1
road_capacity_conversion['tram'] = 1
road_capacity_conversion['pier'] = 1
road_capacity_conversion['bridleway'] = 1
road_capacity_conversion['monorail'] = 1
road_capacity_conversion['preserved'] = 1

# Define waterway capacity conversion table
road_capacity_conversion['river'] = 1
road_capacity_conversion['cannal'] = 1
road_capacity_conversion['drain'] = 0
road_capacity_conversion['stream'] = 0
road_capacity_conversion['ditch'] = 0
road_capacity_conversion['canal'] = 1

# Specify relevant road_types
road_types = ['motorway', 'motorway_link', 'trunk', 'trunk_link', 'primary', 'primary_link',
                           'secondary', 'secondary_link', 'tertiary', 'tertiary_link', 'residential', 'road',
                           'living_street', 'unclassified']


## Run Code

### Read OSM Files and Pixel List

In [20]:
# Read OSM files and pixel list
road_list = extract_OSM_data('00 - City_OSM_files/sao-paulo_brazil/sao-paulo_brazil_roads.geojson')
waterway_list = extract_OSM_data('00 - City_OSM_files/sao-paulo_brazil_colorado/sao-paulo_brazil_waterways.geojson')
pixel_list = read_pixel_list('01 - City_Pixel_list/pixel_list_SaoPaulo.xlsx')


IOError: no such file or directory: '00 - City_OSM_files/sao-paulo_brazil_colorado/sao-paulo_brazil_waterways.geojson'

### Process and translate roads to UTM

In [None]:
# Convert and prepare roads
road_list = find_road_bounds(road_list)
road_list = convert_linestring_to_UTM(road_list)

### Find intersections

In [None]:
# Find intersections
coordinates_df = find_coordinates_relevant_road_types(road_list, road_types)
intersections = road_connecitivity(coordinates_df)
pixel_list = map_points(intersections, pixel_list, 'road_connectivity')

### Find road capacity

In [None]:
#Add roads to pixel
pixel_list = road_capacity(pixel_list, road_list, road_types)   

### Process railroad data

In [None]:
#Add railroads to pixel
pixel_list = rail_road(pixel_list, road_list, ['rail'])

### Process waterway data

In [None]:
# Convert and prepare waterways
waterway_list = find_road_bounds(waterway_list)
waterway_list = convert_linestring_to_UTM(waterway_list)

# Add waterways to pixel
pixel_list = waterway(pixel_list, waterway_list, waterway_list['type'].unique())

### Write output as excel file

In [None]:
pixel_list.to_excel('02 - City_Output/Test.xlsx', columns = ['pixelid', 'population', 'road_connectivity', 'road_capacity', 'road_capacity_incl_highway', 'road_capacity_highway', 
                                                                 'road_distance', 'road_distance_incl_highway', 'road_distance_highway', 'oneway_distance', 'oneway_dist_percentage',
           'oneway_capacity', 'oneway_percentage', 'highway_bin', 'bridge_bin', 'tunnel_bin', 'rail_bin', 
           'waterway_bin', 'waterway_distance'])
